diff --git a/plugins/GSdx_legacy/CMakeLists.txt b/plugins/GSdx_legacy/CMakeLists.txt deleted file mode 100644 index 247ef48e4e..0000000000 --- a/plugins/GSdx_legacy/CMakeLists.txt +++ /dev/null @@ -1,237 +0,0 @@ -# Check that people use the good file -if(NOT TOP_CMAKE_WAS_SOURCED) - message(FATAL_ERROR " - You did not 'cmake' the good CMakeLists.txt file. Use the one in the top dir. - It is advice to delete all wrongly generated cmake stuff => CMakeFiles & CMakeCache.txt") -endif() - - -# plugin name -set(Output GSdx-legacy-1.0.0) - -set(CommonFlags - -fno-operator-names # because Xbyak uses and()/xor()/or()/not() function - -fno-strict-aliasing - -Wno-unknown-pragmas - -Wno-parentheses - -Wunused-variable # __dummy variable need to be investigated - ) - -# The next two need to be looked at, but spam really badly in gcc 8. -# Largely class alignment in GSDevice.h and memcpy in GSVector*.h. -if(GCC_VERSION VERSION_EQUAL "8.0" OR GCC_VERSION VERSION_GREATER "8.0") - set(CommonFlags ${CommonFlags} - -Wno-packed-not-aligned - -Wno-class-memaccess - ) -endif() - -set(GSdxFinalFlags ${CommonFlags}) - -if(XDG_STD) - set(GSdxFinalFlags ${GSdxFinalFlags} -DXDG_STD) -endif() - -if(EGL_API AND EGL_FOUND) - set(GSdxFinalFlags ${GSdxFinalFlags} -DEGL_SUPPORTED) -endif() - -if(LIBLZMA_FOUND) - set(GSdxFinalFlags ${GSdxFinalFlags} -DLZMA_SUPPORTED) -endif() - -#Clang doesn't support a few common flags that GCC does. -if(NOT USE_CLANG) - set(GSdxFinalFlags ${GSdxFinalFlags} -fabi-version=6) -endif() - -set(GSdxSources - GLLoader.cpp - GLState.cpp - GPU.cpp - GPUDrawScanline.cpp - GPUDrawScanlineCodeGenerator.cpp - GPULocalMemory.cpp - GPURenderer.cpp - GPURendererSW.cpp - GPUSetupPrimCodeGenerator.cpp - GPUState.cpp - GS.cpp - GSAlignedClass.cpp - GSBlock.cpp - GSCapture.cpp - GSClut.cpp - GSCodeBuffer.cpp - GSCrc.cpp - GSDevice.cpp - GSDeviceOGL.cpp - GSDeviceSW.cpp - GSDeviceNull.cpp - GSDirtyRect.cpp - GSDrawingContext.cpp - GSDrawScanline.cpp - GSDrawScanlineCodeGenerator.cpp - GSDrawScanlineCodeGenerator.x86.avx.cpp - GSDrawScanlineCodeGenerator.x86.avx2.cpp - GSDrawScanlineCodeGenerator.x64.cpp - GSDrawScanlineCodeGenerator.x86.cpp - GSDrawScanlineCodeGenerator.x64.avx.cpp - GSDump.cpp - GSFunctionMap.cpp - GSLinuxDialog.cpp - GSLocalMemory.cpp - GSLzma.cpp - GSPerfMon.cpp - GSPng.cpp - GSRasterizer.cpp - GSRenderer.cpp - GSRendererCL.cpp - GSRendererHW.cpp - GSRendererNull.cpp - GSRendererOGL.cpp - GSRendererSW.cpp - GSSetting.cpp - GSSetupPrimCodeGenerator.cpp - GSSetupPrimCodeGenerator.x86.avx.cpp - GSSetupPrimCodeGenerator.x86.avx2.cpp - GSSetupPrimCodeGenerator.x64.avx.cpp - GSSetupPrimCodeGenerator.x86.cpp - GSSetupPrimCodeGenerator.x64.cpp - GSShaderOGL.cpp - GSState.cpp - GSTables.cpp - GSTexture.cpp - GSTextureCache.cpp - GSTextureCacheSW.cpp - GSTextureCacheOGL.cpp - GSTextureFXOGL.cpp - GSTextureOGL.cpp - GSTextureNull.cpp - GSTextureSW.cpp - GSThread.cpp - GSUtil.cpp - GSVector.cpp - GSVertexTrace.cpp - GSWnd.cpp - GSWndOGL.cpp - GSWndEGL.cpp - GSdx.cpp - stdafx.cpp - ) - -set(GSdxHeaders - GPU.h - GPUDrawScanline.h - GPUDrawScanlineCodeGenerator.h - GPUDrawingEnvironment.h - GPULocalMemory.h - GPURenderer.h - GPURendererSW.h - GPUScanlineEnvironment.h - GPUSetupPrimCodeGenerator.h - GPUState.h - GPUVertex.h - GS.h - GSAlignedClass.h - GSBlock.h - GSCapture.h - GSClut.h - GSCodeBuffer.h - GSCrc.h - GSDevice.h - GSDeviceOGL.h - GSDeviceNull.h - GSDirtyRect.h - GSDrawScanline.h - GSDrawScanlineCodeGenerator.h - GSDrawingContext.h - GSDrawingEnvironment.h - GSDump.h - GSFunctionMap.h - GSLinuxLogo.h - GSLocalMemory.h - GSPerfMon.h - GSRasterizer.h - GSRenderer.h - GSRendererNull.h - GSRendererSW.h - GSRendererHW.h - GSRendererOGL.h - GSScanlineEnvironment.h - GSSetting.h - GSSetupPrimCodeGenerator.h - GSState.h - GSTables.h - GSTexture.h - GSTextureCache.h - GSTextureCacheSW.h - GSTextureCacheOGL.h - GSTextureNull.h - GSThread.h - GSUtil.h - GSVector.h - GSVertex.h - GSVertexHW.h - GSVertexList.h - GSVertexSW.h - GSVertexTrace.h - GSWnd.h - GSWndOGL.h - GSWndEGL.h - GSdx.h - res/glsl_source.h - stdafx.h - xbyak/xbyak.h - xbyak/xbyak_bin2hex.h - xbyak/xbyak_mnemonic.h - xbyak/xbyak_util.h - ) - -set(GSdxFinalSources - ${GSdxSources} - ${GSdxHeaders} -) - -set(GSdxFinalLibs - ${X11_LIBRARIES} -) - -set(GSdxFinalLibs ${GSdxFinalLibs} - ${OPENGL_LIBRARIES} - ${GTK2_LIBRARIES} - ${LIBC_LIBRARIES} - ${PNG_LIBRARY} -) - -if(EGL_API AND EGL_FOUND) - set(GSdxFinalLibs ${GSdxFinalLibs} - ${EGL_LIBRARIES} - ) -endif() - -if(LIBLZMA_FOUND) - set(GSdxFinalLibs ${GSdxFinalLibs} - ${LIBLZMA_LIBRARIES} - ) -endif() - -# Generate Glsl header file. Protect with REBUILD_SHADER to avoid build-dependency on PERL -if (REBUILD_SHADER) - add_custom_command(OUTPUT res/glsl_source.h COMMAND perl ${CMAKE_SOURCE_DIR}/linux_various/glsl2h.pl) -endif() - -if(BUILTIN_GS) - add_pcsx2_lib(${Output} "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags}") -else() - add_pcsx2_plugin(${Output} "${GSdxFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags}") -endif() - -################################### Replay Loader -if(BUILD_REPLAY_LOADERS AND NOT GSdx) - set(Replay pcsx2_GSReplayLoader) - set(GSdxReplayLoaderFinalSources - ${GSdxFinalSources} - linux_replay.cpp - ) - add_pcsx2_executable(${Replay} "${GSdxReplayLoaderFinalSources}" "${GSdxFinalLibs}" "${GSdxFinalFlags}") -endif() diff --git a/plugins/GSdx_legacy/GLLoader.cpp b/plugins/GSdx_legacy/GLLoader.cpp deleted file mode 100644 index 680f19fab1..0000000000 --- a/plugins/GSdx_legacy/GLLoader.cpp +++ /dev/null @@ -1,532 +0,0 @@ -/* * Copyright (C) 2011-2014 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GLLoader.h" -#include "GSdx.h" - -PFNGLACTIVETEXTUREPROC gl_ActiveTexture = NULL; -PFNGLBLENDCOLORPROC gl_BlendColor = NULL; - -PFNGLATTACHSHADERPROC glAttachShader = NULL; -PFNGLBINDBUFFERPROC glBindBuffer = NULL; -PFNGLBINDBUFFERBASEPROC glBindBufferBase = NULL; -PFNGLBINDBUFFERRANGEPROC glBindBufferRange = NULL; -PFNGLBINDFRAMEBUFFERPROC glBindFramebuffer = NULL; -PFNGLBINDSAMPLERPROC glBindSampler = NULL; -PFNGLBINDVERTEXARRAYPROC glBindVertexArray = NULL; -PFNGLBLENDEQUATIONSEPARATEIARBPROC glBlendEquationSeparateiARB = NULL; -PFNGLBLENDFUNCSEPARATEIARBPROC glBlendFuncSeparateiARB = NULL; -PFNGLBLITFRAMEBUFFERPROC glBlitFramebuffer = NULL; -PFNGLBUFFERDATAPROC glBufferData = NULL; -PFNGLCHECKFRAMEBUFFERSTATUSPROC glCheckFramebufferStatus = NULL; -PFNGLCLEARBUFFERFVPROC glClearBufferfv = NULL; -PFNGLCLEARBUFFERIVPROC glClearBufferiv = NULL; -PFNGLCLEARBUFFERUIVPROC glClearBufferuiv = NULL; -PFNGLCOLORMASKIPROC glColorMaski = NULL; -PFNGLCOMPILESHADERPROC glCompileShader = NULL; -PFNGLCREATEPROGRAMPROC glCreateProgram = NULL; -PFNGLCREATESHADERPROC glCreateShader = NULL; -PFNGLCREATESHADERPROGRAMVPROC glCreateShaderProgramv = NULL; -PFNGLDELETEBUFFERSPROC glDeleteBuffers = NULL; -PFNGLDELETEFRAMEBUFFERSPROC glDeleteFramebuffers = NULL; -PFNGLDELETEPROGRAMPROC glDeleteProgram = NULL; -PFNGLDELETESAMPLERSPROC glDeleteSamplers = NULL; -PFNGLDELETESHADERPROC glDeleteShader = NULL; -PFNGLDELETEVERTEXARRAYSPROC glDeleteVertexArrays = NULL; -PFNGLDETACHSHADERPROC glDetachShader = NULL; -PFNGLDRAWBUFFERSPROC glDrawBuffers = NULL; -PFNGLDRAWELEMENTSBASEVERTEXPROC glDrawElementsBaseVertex = NULL; -PFNGLENABLEVERTEXATTRIBARRAYPROC glEnableVertexAttribArray = NULL; -PFNGLFRAMEBUFFERRENDERBUFFERPROC glFramebufferRenderbuffer = NULL; -PFNGLFRAMEBUFFERTEXTURE2DPROC glFramebufferTexture2D = NULL; -PFNGLGENBUFFERSPROC glGenBuffers = NULL; -PFNGLGENFRAMEBUFFERSPROC glGenFramebuffers = NULL; -PFNGLGENSAMPLERSPROC glGenSamplers = NULL; -PFNGLGENVERTEXARRAYSPROC glGenVertexArrays = NULL; -PFNGLGETBUFFERPARAMETERIVPROC glGetBufferParameteriv = NULL; -PFNGLGETDEBUGMESSAGELOGARBPROC glGetDebugMessageLogARB = NULL; -PFNGLDEBUGMESSAGECALLBACKPROC glDebugMessageCallback = NULL; -PFNGLGETPROGRAMINFOLOGPROC glGetProgramInfoLog = NULL; -PFNGLGETPROGRAMIVPROC glGetProgramiv = NULL; -PFNGLGETSHADERIVPROC glGetShaderiv = NULL; -PFNGLGETSTRINGIPROC glGetStringi = NULL; -PFNGLISFRAMEBUFFERPROC glIsFramebuffer = NULL; -PFNGLLINKPROGRAMPROC glLinkProgram = NULL; -PFNGLMAPBUFFERPROC glMapBuffer = NULL; -PFNGLMAPBUFFERRANGEPROC glMapBufferRange = NULL; -PFNGLPROGRAMPARAMETERIPROC glProgramParameteri = NULL; -PFNGLSAMPLERPARAMETERFPROC glSamplerParameterf = NULL; -PFNGLSAMPLERPARAMETERIPROC glSamplerParameteri = NULL; -PFNGLSHADERSOURCEPROC glShaderSource = NULL; -PFNGLUNIFORM1IPROC glUniform1i = NULL; -PFNGLUNMAPBUFFERPROC glUnmapBuffer = NULL; -PFNGLUSEPROGRAMSTAGESPROC glUseProgramStages = NULL; -PFNGLVERTEXATTRIBIPOINTERPROC glVertexAttribIPointer = NULL; -PFNGLVERTEXATTRIBPOINTERPROC glVertexAttribPointer = NULL; -PFNGLBUFFERSUBDATAPROC glBufferSubData = NULL; -PFNGLFENCESYNCPROC glFenceSync = NULL; -PFNGLDELETESYNCPROC glDeleteSync = NULL; -PFNGLCLIENTWAITSYNCPROC glClientWaitSync = NULL; -PFNGLFLUSHMAPPEDBUFFERRANGEPROC glFlushMappedBufferRange = NULL; -PFNGLBLENDEQUATIONSEPARATEPROC glBlendEquationSeparate = NULL; -PFNGLBLENDFUNCSEPARATEPROC glBlendFuncSeparate = NULL; -// Query object -PFNGLBEGINQUERYPROC glBeginQuery = NULL; -PFNGLENDQUERYPROC glEndQuery = NULL; -PFNGLGETQUERYIVPROC glGetQueryiv = NULL; -PFNGLGETQUERYOBJECTIVPROC glGetQueryObjectiv = NULL; -PFNGLGETQUERYOBJECTUIVPROC glGetQueryObjectuiv = NULL; -PFNGLQUERYCOUNTERPROC glQueryCounter = NULL; -PFNGLGETQUERYOBJECTI64VPROC glGetQueryObjecti64v = NULL; -PFNGLGETQUERYOBJECTUI64VPROC glGetQueryObjectui64v = NULL; -PFNGLGETINTEGER64VPROC glGetInteger64v = NULL; -// GL4.0 -// GL4.1 -PFNGLBINDPROGRAMPIPELINEPROC glBindProgramPipeline = NULL; -PFNGLGENPROGRAMPIPELINESPROC glGenProgramPipelines = NULL; -PFNGLDELETEPROGRAMPIPELINESPROC glDeleteProgramPipelines = NULL; -PFNGLGETPROGRAMPIPELINEIVPROC glGetProgramPipelineiv = NULL; -PFNGLVALIDATEPROGRAMPIPELINEPROC glValidateProgramPipeline = NULL; -PFNGLGETPROGRAMPIPELINEINFOLOGPROC glGetProgramPipelineInfoLog = NULL; -PFNGLGETPROGRAMBINARYPROC glGetProgramBinary = NULL; -PFNGLVIEWPORTINDEXEDFPROC glViewportIndexedf = NULL; -PFNGLVIEWPORTINDEXEDFVPROC glViewportIndexedfv = NULL; -PFNGLSCISSORINDEXEDPROC glScissorIndexed = NULL; -PFNGLSCISSORINDEXEDVPROC glScissorIndexedv = NULL; -// NO GL4.1 -PFNGLUSEPROGRAMPROC glUseProgram = NULL; -PFNGLGETSHADERINFOLOGPROC glGetShaderInfoLog = NULL; -PFNGLPROGRAMUNIFORM1IPROC glProgramUniform1i = NULL; -// GL4.3 -PFNGLCOPYIMAGESUBDATAPROC glCopyImageSubData = NULL; -PFNGLINVALIDATETEXIMAGEPROC glInvalidateTexImage = NULL; -PFNGLPUSHDEBUGGROUPPROC glPushDebugGroup = NULL; -PFNGLPOPDEBUGGROUPPROC glPopDebugGroup = NULL; -PFNGLDEBUGMESSAGEINSERTPROC glDebugMessageInsert = NULL; -PFNGLDEBUGMESSAGECONTROLPROC glDebugMessageControl = NULL; -// GL4.2 -PFNGLBINDIMAGETEXTUREPROC glBindImageTexture = NULL; -PFNGLMEMORYBARRIERPROC glMemoryBarrier = NULL; -PFNGLTEXSTORAGE2DPROC glTexStorage2D = NULL; -// GL4.4 -PFNGLCLEARTEXIMAGEPROC glClearTexImage = NULL; -PFNGLBUFFERSTORAGEPROC glBufferStorage = NULL; - -// GL4.5 -PFNGLCREATETEXTURESPROC glCreateTextures = NULL; -PFNGLTEXTURESTORAGE2DPROC glTextureStorage2D = NULL; -PFNGLTEXTURESUBIMAGE2DPROC glTextureSubImage2D = NULL; -PFNGLCOPYTEXTURESUBIMAGE2DPROC glCopyTextureSubImage2D = NULL; -PFNGLBINDTEXTUREUNITPROC glBindTextureUnit = NULL; -PFNGLGETTEXTUREIMAGEPROC glGetTextureImage = NULL; -PFNGLTEXTUREPARAMETERIPROC glTextureParameteri = NULL; - -PFNGLCREATEFRAMEBUFFERSPROC glCreateFramebuffers = NULL; -PFNGLCLEARNAMEDFRAMEBUFFERFVPROC glClearNamedFramebufferfv = NULL; -PFNGLCLEARNAMEDFRAMEBUFFERIVPROC glClearNamedFramebufferiv = NULL; -PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC glClearNamedFramebufferuiv = NULL; -PFNGLNAMEDFRAMEBUFFERTEXTUREPROC glNamedFramebufferTexture = NULL; -PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC glNamedFramebufferDrawBuffers = NULL; -PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC glNamedFramebufferReadBuffer = NULL; -PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC glCheckNamedFramebufferStatus = NULL; - -PFNGLCREATEBUFFERSPROC glCreateBuffers = NULL; -PFNGLNAMEDBUFFERSTORAGEPROC glNamedBufferStorage = NULL; -PFNGLNAMEDBUFFERDATAPROC glNamedBufferData = NULL; -PFNGLNAMEDBUFFERSUBDATAPROC glNamedBufferSubData = NULL; -PFNGLMAPNAMEDBUFFERPROC glMapNamedBuffer = NULL; -PFNGLMAPNAMEDBUFFERRANGEPROC glMapNamedBufferRange = NULL; -PFNGLUNMAPNAMEDBUFFERPROC glUnmapNamedBuffer = NULL; -PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC glFlushMappedNamedBufferRange = NULL; - -PFNGLCREATESAMPLERSPROC glCreateSamplers = NULL; -PFNGLCREATEPROGRAMPIPELINESPROC glCreateProgramPipelines = NULL; - -PFNGLCLIPCONTROLPROC glClipControl = NULL; -PFNGLTEXTUREBARRIERPROC glTextureBarrier = NULL; - -namespace Emulate_DSA { - // Texture entry point - void APIENTRY BindTextureUnit(GLuint unit, GLuint texture) { - gl_ActiveTexture(GL_TEXTURE0 + unit); - glBindTexture(GL_TEXTURE_2D, texture); - } - - void APIENTRY CreateTexture(GLenum target, GLsizei n, GLuint *textures) { - glGenTextures(1, textures); - } - - void APIENTRY TextureStorage(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height) { - BindTextureUnit(7, texture); - glTexStorage2D(GL_TEXTURE_2D, levels, internalformat, width, height); - } - - void APIENTRY TextureSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels) { - BindTextureUnit(7, texture); - glTexSubImage2D(GL_TEXTURE_2D, level, xoffset, yoffset, width, height, format, type, pixels); - } - - void APIENTRY CopyTextureSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height) { - BindTextureUnit(7, texture); - glCopyTexSubImage2D(GL_TEXTURE_2D, level, xoffset, yoffset, x, y, width, height); - } - - void APIENTRY GetTexureImage(GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels) { - BindTextureUnit(7, texture); - glGetTexImage(GL_TEXTURE_2D, level, format, type, pixels); - } - - void APIENTRY TextureParameteri (GLuint texture, GLenum pname, GLint param) { - BindTextureUnit(7, texture); - glTexParameteri(GL_TEXTURE_2D, pname, param); - } - - // Framebuffer entry point - GLenum fb_target = 0; - void SetFramebufferTarget(GLenum target) { - fb_target = target; - } - - void APIENTRY CreateFramebuffers(GLsizei n, GLuint *framebuffers) { - glGenFramebuffers(n, framebuffers); - } - - void APIENTRY ClearNamedFramebufferfv(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat *value) { - glBindFramebuffer(fb_target, framebuffer); - glClearBufferfv(buffer, drawbuffer, value); - } - - void APIENTRY ClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint *value) { - glBindFramebuffer(fb_target, framebuffer); - glClearBufferiv(buffer, drawbuffer, value); - } - - void APIENTRY ClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint *value) { - glBindFramebuffer(fb_target, framebuffer); - glClearBufferuiv(buffer, drawbuffer, value); - } - - void APIENTRY NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level) { - glBindFramebuffer(fb_target, framebuffer); - glFramebufferTexture2D(fb_target, attachment, GL_TEXTURE_2D, texture, level); - } - - void APIENTRY NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, const GLenum *bufs) { - glBindFramebuffer(fb_target, framebuffer); - glDrawBuffers(n, bufs); - } - - void APIENTRY NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src) { - glBindFramebuffer(fb_target, framebuffer); - glReadBuffer(src); - glBindFramebuffer(fb_target, 0); - } - - GLenum APIENTRY CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target) { - glBindFramebuffer(fb_target, framebuffer); - return glCheckFramebufferStatus(fb_target); - } - - // Buffer entry point - GLenum buffer_target = 0; - void SetBufferTarget(GLenum target) { - buffer_target = target; - } - - void APIENTRY CreateBuffers(GLsizei n, GLuint *buffers) { - glGenBuffers(1, buffers); - } - - void APIENTRY NamedBufferStorage(GLuint buffer, buffer_proc_t size, const void *data, GLbitfield flags) { - glBindBuffer(buffer_target, buffer); - glBufferStorage(buffer_target, size, data, flags); - } - - void APIENTRY NamedBufferData(GLuint buffer, buffer_proc_t size, const void *data, GLenum usage) { - glBindBuffer(buffer_target, buffer); - glBufferData(buffer_target, size, data, usage); - } - - void APIENTRY NamedBufferSubData(GLuint buffer, GLintptr offset, buffer_proc_t size, const void *data) { - glBindBuffer(buffer_target, buffer); - glBufferSubData(buffer_target, offset, size, data); - } - - void *APIENTRY MapNamedBuffer(GLuint buffer, GLenum access) { - glBindBuffer(buffer_target, buffer); - return glMapBuffer(buffer_target, access); - } - - void *APIENTRY MapNamedBufferRange(GLuint buffer, GLintptr offset, buffer_proc_t length, GLbitfield access) { - glBindBuffer(buffer_target, buffer); - return glMapBufferRange(buffer_target, offset, length, access); - } - - GLboolean APIENTRY UnmapNamedBuffer(GLuint buffer) { - glBindBuffer(buffer_target, buffer); - return glUnmapBuffer(buffer_target); - } - - void APIENTRY FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset, buffer_proc_t length) { - glBindBuffer(buffer_target, buffer); - glFlushMappedBufferRange(buffer_target, offset, length); - } - - // Misc entry point - // (only purpose is to have a consistent API otherwise it is useless) - void APIENTRY CreateProgramPipelines(GLsizei n, GLuint *pipelines) { - glGenProgramPipelines(n, pipelines); - } - - void APIENTRY CreateSamplers(GLsizei n, GLuint *samplers) { - glGenSamplers(n, samplers); - } - - // Replace function pointer to emulate DSA behavior - void Init() { - fprintf(stderr, "DSA is not supported. Replacing the GL function pointer to emulate it\n"); - glBindTextureUnit = BindTextureUnit; - glCreateTextures = CreateTexture; - glTextureStorage2D = TextureStorage; - glTextureSubImage2D = TextureSubImage; - glCopyTextureSubImage2D = CopyTextureSubImage; - glGetTextureImage = GetTexureImage; - glTextureParameteri = TextureParameteri; - - glCreateFramebuffers = CreateFramebuffers; - glClearNamedFramebufferfv = ClearNamedFramebufferfv; - glClearNamedFramebufferiv = ClearNamedFramebufferiv; - glClearNamedFramebufferuiv = ClearNamedFramebufferuiv; - glNamedFramebufferDrawBuffers = NamedFramebufferDrawBuffers; - glNamedFramebufferReadBuffer = NamedFramebufferReadBuffer; - glCheckNamedFramebufferStatus = CheckNamedFramebufferStatus; - - glCreateBuffers = CreateBuffers; - glNamedBufferStorage = NamedBufferStorage; - glNamedBufferData = NamedBufferData; - glNamedBufferSubData = NamedBufferSubData; - glMapNamedBuffer = MapNamedBuffer; - glMapNamedBufferRange = MapNamedBufferRange; - glUnmapNamedBuffer = UnmapNamedBuffer; - glFlushMappedNamedBufferRange = FlushMappedNamedBufferRange; - - glCreateProgramPipelines = CreateProgramPipelines; - glCreateSamplers = CreateSamplers; - } -} - -namespace GLLoader { - - bool fglrx_buggy_driver = false; - bool mesa_amd_buggy_driver = false; - bool nvidia_buggy_driver = false; - bool intel_buggy_driver = false; - bool in_replayer = false; - - - bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default - bool found_GL_EXT_texture_filter_anisotropic = false; - bool found_GL_ARB_clear_texture = false; // Miss AMD Mesa (otherwise seems SW) - // DX10 GPU limited driver (SW) - bool found_GL_ARB_copy_image = false; - bool found_GL_ARB_texture_barrier = false; - bool found_GL_ARB_clip_control = false; - bool found_GL_ARB_direct_state_access = false; - bool found_GL_ARB_separate_shader_objects = false; // Issue with Catalyst... - bool found_GL_ARB_buffer_storage = false; - // DX11 GPU - bool found_GL_ARB_draw_buffers_blend = false; // Not supported on AMD R600 (80 nm class chip, HD2900). Nvidia requires FERMI. Intel SB - bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge - bool found_GL_ARB_shader_image_load_store = false; // Intel IB. Nvidia/AMD miss Mesa implementation. - bool found_GL_ARB_viewport_array = false; // Intel IB. AMD/NVIDIA DX10 - - // Mandatory - bool found_GL_ARB_texture_storage = false; - bool found_GL_ARB_shading_language_420pack = false; - - static bool status_and_override(bool& found, const std::string& name, bool mandatory = false) - { - if (mandatory) { - if (!found) { - fprintf(stderr, "ERROR: %s is NOT SUPPORTED\n", name.c_str()); - } - return found; - } - - if (!found) { - fprintf(stdout, "INFO: %s is NOT SUPPORTED\n", name.c_str()); - } else { - fprintf(stdout, "INFO: %s is available\n", name.c_str()); - } - - std::string opt("override_"); - opt += name; - - if (theApp.GetConfig(opt.c_str(), -1) != -1) { - found = !!theApp.GetConfig(opt.c_str(), -1); - fprintf(stderr, "Override %s detection (%s)\n", name.c_str(), found ? "Enabled" : "Disabled"); - } - - return true; - } - - bool check_gl_version(int major, int minor) { - - const GLubyte* s = glGetString(GL_VERSION); - if (s == NULL) { - fprintf(stderr, "Error: GLLoader failed to get GL version\n"); - return false; - } - GLuint v = 1; - while (s[v] != '\0' && s[v-1] != ' ') v++; - - const char* vendor = (const char*)glGetString(GL_VENDOR); - fprintf(stdout, "OpenGL information. GPU: %s. Vendor: %s. Driver: %s\n", glGetString(GL_RENDERER), vendor, &s[v]); - - // Name changed but driver is still bad! - if (strstr(vendor, "ATI") || strstr(vendor, "Advanced Micro Devices")) - fglrx_buggy_driver = true; - if (strstr(vendor, "NVIDIA Corporation")) - nvidia_buggy_driver = true; - if (strstr(vendor, "Intel")) - intel_buggy_driver = true; - if (strstr(vendor, "X.Org") || strstr(vendor, "nouveau")) // Note: it might actually catch nouveau too, but bugs are likely to be the same anyway - mesa_amd_buggy_driver = true; - if (strstr(vendor, "VMware")) // Assume worst case because I don't know the real status - mesa_amd_buggy_driver = intel_buggy_driver = true; - - if (mesa_amd_buggy_driver) { - fprintf(stderr, "Buggy driver detected. Geometry shaders will be disabled\n"); - found_geometry_shader = false; - } - if (theApp.GetConfig("override_geometry_shader", -1) != -1) { - found_geometry_shader = !!theApp.GetConfig("override_geometry_shader", -1); - fprintf(stderr, "Overriding geometry shaders detection\n"); - } - - GLint major_gl = 0; - GLint minor_gl = 0; - glGetIntegerv(GL_MAJOR_VERSION, &major_gl); - glGetIntegerv(GL_MINOR_VERSION, &minor_gl); - if ( (major_gl < major) || ( major_gl == major && minor_gl < minor ) ) { - fprintf(stderr, "OpenGL %d.%d is not supported. Only OpenGL %d.%d\n was found", major, minor, major_gl, minor_gl); - return false; - } - - return true; - } - - bool check_gl_supported_extension() { - int max_ext = 0; - glGetIntegerv(GL_NUM_EXTENSIONS, &max_ext); - - if (glGetStringi && max_ext) { - for (GLint i = 0; i < max_ext; i++) { - string ext((const char*)glGetStringi(GL_EXTENSIONS, i)); - // Bonus - if (ext.compare("GL_EXT_texture_filter_anisotropic") == 0) found_GL_EXT_texture_filter_anisotropic = true; - // GL4.0 - if (ext.compare("GL_ARB_gpu_shader5") == 0) found_GL_ARB_gpu_shader5 = true; - if (ext.compare("GL_ARB_draw_buffers_blend") == 0) found_GL_ARB_draw_buffers_blend = true; - // GL4.1 - if (ext.compare("GL_ARB_viewport_array") == 0) found_GL_ARB_viewport_array = true; - if (ext.compare("GL_ARB_separate_shader_objects") == 0) { - if (!fglrx_buggy_driver && !mesa_amd_buggy_driver && !intel_buggy_driver) found_GL_ARB_separate_shader_objects = true; - else fprintf(stderr, "Buggy driver detected, GL_ARB_separate_shader_objects will be disabled\n" -#ifdef __linux__ - "Note the extension will be fixed on Mesa 11.2 or 11.1.2.\n" -#endif - "AMD proprietary driver => https://community.amd.com/thread/194895\n" - "If you want to try it, you can set the variable override_GL_ARB_separate_shader_objects to 1 in the ini file\n"); - } - // GL4.2 - if (ext.compare("GL_ARB_shading_language_420pack") == 0) found_GL_ARB_shading_language_420pack = true; - if (ext.compare("GL_ARB_texture_storage") == 0) found_GL_ARB_texture_storage = true; - if (ext.compare("GL_ARB_shader_image_load_store") == 0) found_GL_ARB_shader_image_load_store = true; - // GL4.3 - if (ext.compare("GL_ARB_copy_image") == 0) found_GL_ARB_copy_image = true; - // GL4.4 - if (ext.compare("GL_ARB_buffer_storage") == 0) found_GL_ARB_buffer_storage = true; - if (ext.compare("GL_ARB_clear_texture") == 0) found_GL_ARB_clear_texture = true; - // GL4.5 - if (ext.compare("GL_ARB_direct_state_access") == 0) found_GL_ARB_direct_state_access = true; - if (ext.compare("GL_ARB_clip_control") == 0) found_GL_ARB_clip_control = true; - if (ext.compare("GL_ARB_texture_barrier") == 0) found_GL_ARB_texture_barrier = true; - - //fprintf(stderr, "DEBUG ext: %s\n", ext.c_str()); - } - } - - bool status = true; - - // Bonus - status &= status_and_override(found_GL_EXT_texture_filter_anisotropic, "GL_EXT_texture_filter_anisotropic"); - // GL4.0 - status &= status_and_override(found_GL_ARB_gpu_shader5, "GL_ARB_gpu_shader5"); - status &= status_and_override(found_GL_ARB_draw_buffers_blend, "GL_ARB_draw_buffers_blend"); - // GL4.1 - status &= status_and_override(found_GL_ARB_viewport_array, "GL_ARB_viewport_array"); - status &= status_and_override(found_GL_ARB_separate_shader_objects, "GL_ARB_separate_shader_objects"); - // GL4.2 - status &= status_and_override(found_GL_ARB_shader_image_load_store, "GL_ARB_shader_image_load_store"); - status &= status_and_override(found_GL_ARB_shading_language_420pack, "GL_ARB_shading_language_420pack", true); - status &= status_and_override(found_GL_ARB_texture_storage, "GL_ARB_texture_storage", true); - // GL4.3 - status &= status_and_override(found_GL_ARB_copy_image, "GL_ARB_copy_image"); - // GL4.4 - status &= status_and_override(found_GL_ARB_buffer_storage,"GL_ARB_buffer_storage"); - status &= status_and_override(found_GL_ARB_clear_texture,"GL_ARB_clear_texture"); - // GL4.5 - status &= status_and_override(found_GL_ARB_clip_control, "GL_ARB_clip_control"); - status &= status_and_override(found_GL_ARB_direct_state_access, "GL_ARB_direct_state_access"); - status &= status_and_override(found_GL_ARB_texture_barrier, "GL_ARB_texture_barrier"); - - if (!found_GL_ARB_direct_state_access) { - Emulate_DSA::Init(); - } - if (glBindTextureUnit == NULL) { - fprintf(stderr, "FATAL ERROR !!!! Failed to setup DSA function pointer!!!\n"); - status = false; - } - - if (!found_GL_ARB_texture_barrier) { - fprintf(stderr, "Error GL_ARB_texture_barrier is not supported by your driver. You can't emulate correctly the GS blending unit! Sorry!\n"); - theApp.SetConfig("accurate_blending_unit", 0); - theApp.SetConfig("accurate_date", 0); - } - -#ifdef _WIN32 - if (status) { - if (intel_buggy_driver) { - fprintf(stderr, "OpenGL renderer isn't compatible with SandyBridge/IvyBridge GPU due to issues. Sorry.\n" - "Tip:Try it on Linux"); - } - if (fglrx_buggy_driver) { - fprintf(stderr, "OpenGL renderer is slow on AMD GPU due to inefficient driver. Sorry."); - } - } -#endif - - fprintf(stdout, "\n"); - - return status; - } -} diff --git a/plugins/GSdx_legacy/GLLoader.h b/plugins/GSdx_legacy/GLLoader.h deleted file mode 100644 index 2d4da9a98c..0000000000 --- a/plugins/GSdx_legacy/GLLoader.h +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Copyright (C) 2011-2014 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#define GL_TEX_LEVEL_0 (0) -#define GL_TEX_LEVEL_1 (1) -#define GL_FB_DEFAULT (0) -#define GL_BUFFER_0 (0) - -#ifndef GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR -#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008 -#endif - -// FIX compilation issue with Mesa 10 -// Note it might be possible to do better with the right include -// in the rigth order but I don't have time -#ifndef APIENTRY -#define APIENTRY -#endif -#ifndef APIENTRYP -#define APIENTRYP APIENTRY * -#endif - -// Mesa glext.h < 20150122 uses GLsizei for BUFFER*PROCs -#if GL_GLEXT_VERSION < 20150122 -typedef GLsizei buffer_proc_t; -#else -typedef GLsizeiptr buffer_proc_t; -#endif - -// Allow compilation with older mesa -#ifndef GL_VERSION_4_3 -#define GL_VERSION_4_3 1 -typedef void (APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC) (GLDEBUGPROC callback, const void *userParam); -#endif - -#ifndef GL_ARB_copy_image -#define GL_ARB_copy_image 1 -#ifdef GL_GLEXT_PROTOTYPES -GLAPI void APIENTRY glCopyImageSubData (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth); -#endif /* GL_GLEXT_PROTOTYPES */ -typedef void (APIENTRYP PFNGLCOPYIMAGESUBDATAPROC) (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth); -#endif - -#ifndef GL_VERSION_4_4 -#define GL_VERSION_4_4 1 -#define GL_MAX_VERTEX_ATTRIB_STRIDE 0x82E5 -#define GL_MAP_PERSISTENT_BIT 0x0040 -#define GL_MAP_COHERENT_BIT 0x0080 -#define GL_DYNAMIC_STORAGE_BIT 0x0100 -#define GL_CLIENT_STORAGE_BIT 0x0200 -#define GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT 0x00004000 -#define GL_BUFFER_IMMUTABLE_STORAGE 0x821F -#define GL_BUFFER_STORAGE_FLAGS 0x8220 -#define GL_CLEAR_TEXTURE 0x9365 -#define GL_LOCATION_COMPONENT 0x934A -#define GL_TRANSFORM_FEEDBACK_BUFFER_INDEX 0x934B -#define GL_TRANSFORM_FEEDBACK_BUFFER_STRIDE 0x934C -#define GL_QUERY_BUFFER 0x9192 -#define GL_QUERY_BUFFER_BARRIER_BIT 0x00008000 -#define GL_QUERY_BUFFER_BINDING 0x9193 -#define GL_QUERY_RESULT_NO_WAIT 0x9194 -#define GL_MIRROR_CLAMP_TO_EDGE 0x8743 -typedef void (APIENTRYP PFNGLBUFFERSTORAGEPROC) (GLenum target, GLsizeiptr size, const void *data, GLbitfield flags); -typedef void (APIENTRYP PFNGLCLEARTEXIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, const void *data); -typedef void (APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *data); -typedef void (APIENTRYP PFNGLBINDBUFFERSBASEPROC) (GLenum target, GLuint first, GLsizei count, const GLuint *buffers); -typedef void (APIENTRYP PFNGLBINDBUFFERSRANGEPROC) (GLenum target, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizeiptr *sizes); -typedef void (APIENTRYP PFNGLBINDTEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); -typedef void (APIENTRYP PFNGLBINDSAMPLERSPROC) (GLuint first, GLsizei count, const GLuint *samplers); -typedef void (APIENTRYP PFNGLBINDIMAGETEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); -typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERSPROC) (GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides); -#endif /* GL_VERSION_4_4 */ - -// Note: trim it -#ifndef GL_VERSION_4_5 -#define GL_VERSION_4_5 1 -#define GL_CONTEXT_LOST 0x0507 -#define GL_NEGATIVE_ONE_TO_ONE 0x935E -#define GL_ZERO_TO_ONE 0x935F -#define GL_CLIP_ORIGIN 0x935C -#define GL_CLIP_DEPTH_MODE 0x935D -#define GL_QUERY_WAIT_INVERTED 0x8E17 -#define GL_QUERY_NO_WAIT_INVERTED 0x8E18 -#define GL_QUERY_BY_REGION_WAIT_INVERTED 0x8E19 -#define GL_QUERY_BY_REGION_NO_WAIT_INVERTED 0x8E1A -#define GL_MAX_CULL_DISTANCES 0x82F9 -#define GL_MAX_COMBINED_CLIP_AND_CULL_DISTANCES 0x82FA -#define GL_TEXTURE_TARGET 0x1006 -#define GL_QUERY_TARGET 0x82EA -#define GL_TEXTURE_BINDING 0x82EB -#define GL_GUILTY_CONTEXT_RESET 0x8253 -#define GL_INNOCENT_CONTEXT_RESET 0x8254 -#define GL_UNKNOWN_CONTEXT_RESET 0x8255 -#define GL_RESET_NOTIFICATION_STRATEGY 0x8256 -#define GL_LOSE_CONTEXT_ON_RESET 0x8252 -#define GL_NO_RESET_NOTIFICATION 0x8261 -#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT 0x00000004 -#define GL_CONTEXT_RELEASE_BEHAVIOR 0x82FB -#define GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH 0x82FC -typedef void (APIENTRYP PFNGLCLIPCONTROLPROC) (GLenum origin, GLenum depth); -typedef void (APIENTRYP PFNGLCREATEBUFFERSPROC) (GLsizei n, GLuint *buffers); -typedef void (APIENTRYP PFNGLNAMEDBUFFERSTORAGEPROC) (GLuint buffer, GLsizei size, const void *data, GLbitfield flags); -typedef void (APIENTRYP PFNGLNAMEDBUFFERDATAPROC) (GLuint buffer, GLsizei size, const void *data, GLenum usage); -typedef void (APIENTRYP PFNGLNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizei size, const void *data); -typedef void (APIENTRYP PFNGLCOPYNAMEDBUFFERSUBDATAPROC) (GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizei size); -typedef void (APIENTRYP PFNGLCLEARNAMEDBUFFERDATAPROC) (GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void *data); -typedef void (APIENTRYP PFNGLCLEARNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLenum internalformat, GLintptr offset, GLsizei size, GLenum format, GLenum type, const void *data); -typedef void *(APIENTRYP PFNGLMAPNAMEDBUFFERPROC) (GLuint buffer, GLenum access); -typedef void *(APIENTRYP PFNGLMAPNAMEDBUFFERRANGEPROC) (GLuint buffer, GLintptr offset, GLsizei length, GLbitfield access); -typedef GLboolean (APIENTRYP PFNGLUNMAPNAMEDBUFFERPROC) (GLuint buffer); -typedef void (APIENTRYP PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC) (GLuint buffer, GLintptr offset, GLsizei length); -typedef void (APIENTRYP PFNGLCREATEFRAMEBUFFERSPROC) (GLsizei n, GLuint *framebuffers); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC) (GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC) (GLuint framebuffer, GLenum pname, GLint param); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC) (GLuint framebuffer, GLenum buf); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC) (GLuint framebuffer, GLsizei n, const GLenum *bufs); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC) (GLuint framebuffer, GLenum src); -typedef void (APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC) (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments); -typedef void (APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC) (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERIVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint *value); -typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint *value); -typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat *value); -typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFIPROC) (GLuint framebuffer, GLenum buffer, const GLfloat depth, GLint stencil); -typedef void (APIENTRYP PFNGLBLITNAMEDFRAMEBUFFERPROC) (GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); -typedef GLenum (APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC) (GLuint framebuffer, GLenum target); -typedef void (APIENTRYP PFNGLCREATERENDERBUFFERSPROC) (GLsizei n, GLuint *renderbuffers); -typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEPROC) (GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLCREATETEXTURESPROC) (GLenum target, GLsizei n, GLuint *textures); -typedef void (APIENTRYP PFNGLTEXTUREBUFFERPROC) (GLuint texture, GLenum internalformat, GLuint buffer); -typedef void (APIENTRYP PFNGLTEXTUREBUFFERRANGEPROC) (GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizei size); -typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels); -typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data); -typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFPROC) (GLuint texture, GLenum pname, GLfloat param); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFVPROC) (GLuint texture, GLenum pname, const GLfloat *param); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIPROC) (GLuint texture, GLenum pname, GLint param); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIIVPROC) (GLuint texture, GLenum pname, const GLint *params); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIUIVPROC) (GLuint texture, GLenum pname, const GLuint *params); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIVPROC) (GLuint texture, GLenum pname, const GLint *param); -typedef void (APIENTRYP PFNGLGENERATETEXTUREMIPMAPPROC) (GLuint texture); -typedef void (APIENTRYP PFNGLBINDTEXTUREUNITPROC) (GLuint unit, GLuint texture); -typedef void (APIENTRYP PFNGLCREATEVERTEXARRAYSPROC) (GLsizei n, GLuint *arrays); -typedef void (APIENTRYP PFNGLDISABLEVERTEXARRAYATTRIBPROC) (GLuint vaobj, GLuint index); -typedef void (APIENTRYP PFNGLENABLEVERTEXARRAYATTRIBPROC) (GLuint vaobj, GLuint index); -typedef void (APIENTRYP PFNGLVERTEXARRAYELEMENTBUFFERPROC) (GLuint vaobj, GLuint buffer); -typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERPROC) (GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); -typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERSPROC) (GLuint vaobj, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides); -typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBBINDINGPROC) (GLuint vaobj, GLuint attribindex, GLuint bindingindex); -typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBIFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBLFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXARRAYBINDINGDIVISORPROC) (GLuint vaobj, GLuint bindingindex, GLuint divisor); -typedef void (APIENTRYP PFNGLGETVERTEXARRAYIVPROC) (GLuint vaobj, GLenum pname, GLint *param); -typedef void (APIENTRYP PFNGLGETVERTEXARRAYINDEXEDIVPROC) (GLuint vaobj, GLuint index, GLenum pname, GLint *param); -typedef void (APIENTRYP PFNGLGETVERTEXARRAYINDEXED64IVPROC) (GLuint vaobj, GLuint index, GLenum pname, GLint64 *param); -typedef void (APIENTRYP PFNGLCREATESAMPLERSPROC) (GLsizei n, GLuint *samplers); -typedef void (APIENTRYP PFNGLCREATEPROGRAMPIPELINESPROC) (GLsizei n, GLuint *pipelines); -typedef void (APIENTRYP PFNGLCREATEQUERIESPROC) (GLenum target, GLsizei n, GLuint *ids); -typedef void (APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC) (GLbitfield barriers); -typedef void (APIENTRYP PFNGLGETTEXTURESUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void *pixels); -typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void *pixels); -typedef GLenum (APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC) (void); -typedef void (APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint lod, GLsizei bufSize, void *pixels); -typedef void (APIENTRYP PFNGLGETNTEXIMAGEPROC) (GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); -typedef void (APIENTRYP PFNGLGETNUNIFORMDVPROC) (GLuint program, GLint location, GLsizei bufSize, GLdouble *params); -typedef void (APIENTRYP PFNGLGETNUNIFORMFVPROC) (GLuint program, GLint location, GLsizei bufSize, GLfloat *params); -typedef void (APIENTRYP PFNGLGETNUNIFORMIVPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params); -typedef void (APIENTRYP PFNGLGETNUNIFORMUIVPROC) (GLuint program, GLint location, GLsizei bufSize, GLuint *params); -typedef void (APIENTRYP PFNGLREADNPIXELSPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); -typedef void (APIENTRYP PFNGLTEXTUREBARRIERPROC) (void); -typedef void (APIENTRYP PFNGLGETTEXTUREIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); -#endif /* GL_VERSION_4_5 */ - -// Note: glActiveTexture & glBlendColor aren't included in the win GL ABI. -// (maybe gl.h is outdated, or my setup is wrong) -// Anyway, let's just keep the mangled function pointer for those 2 functions. -extern PFNGLACTIVETEXTUREPROC gl_ActiveTexture; -extern PFNGLBLENDCOLORPROC gl_BlendColor; - -extern PFNGLATTACHSHADERPROC glAttachShader; -extern PFNGLBINDBUFFERPROC glBindBuffer; -extern PFNGLBINDBUFFERBASEPROC glBindBufferBase; -extern PFNGLBINDBUFFERRANGEPROC glBindBufferRange; -extern PFNGLBINDFRAMEBUFFERPROC glBindFramebuffer; -extern PFNGLBINDSAMPLERPROC glBindSampler; -extern PFNGLBINDVERTEXARRAYPROC glBindVertexArray; -extern PFNGLBLENDEQUATIONSEPARATEIARBPROC glBlendEquationSeparateiARB; -extern PFNGLBLENDFUNCSEPARATEIARBPROC glBlendFuncSeparateiARB; -extern PFNGLBLITFRAMEBUFFERPROC glBlitFramebuffer; -extern PFNGLBUFFERDATAPROC glBufferData; -extern PFNGLCHECKFRAMEBUFFERSTATUSPROC glCheckFramebufferStatus; -extern PFNGLCLEARBUFFERFVPROC glClearBufferfv; -extern PFNGLCLEARBUFFERIVPROC glClearBufferiv; -extern PFNGLCLEARBUFFERUIVPROC glClearBufferuiv; -extern PFNGLCOMPILESHADERPROC glCompileShader; -extern PFNGLCOLORMASKIPROC glColorMaski; -extern PFNGLCREATEPROGRAMPROC glCreateProgram; -extern PFNGLCREATESHADERPROC glCreateShader; -extern PFNGLCREATESHADERPROGRAMVPROC glCreateShaderProgramv; -extern PFNGLDELETEBUFFERSPROC glDeleteBuffers; -extern PFNGLDELETEFRAMEBUFFERSPROC glDeleteFramebuffers; -extern PFNGLDELETEPROGRAMPROC glDeleteProgram; -extern PFNGLDELETESAMPLERSPROC glDeleteSamplers; -extern PFNGLDELETESHADERPROC glDeleteShader; -extern PFNGLDELETEVERTEXARRAYSPROC glDeleteVertexArrays; -extern PFNGLDETACHSHADERPROC glDetachShader; -extern PFNGLDRAWBUFFERSPROC glDrawBuffers; -extern PFNGLDRAWELEMENTSBASEVERTEXPROC glDrawElementsBaseVertex; -extern PFNGLENABLEVERTEXATTRIBARRAYPROC glEnableVertexAttribArray; -extern PFNGLFRAMEBUFFERRENDERBUFFERPROC glFramebufferRenderbuffer; -extern PFNGLFRAMEBUFFERTEXTURE2DPROC glFramebufferTexture2D; -extern PFNGLGENBUFFERSPROC glGenBuffers; -extern PFNGLGENFRAMEBUFFERSPROC glGenFramebuffers; -extern PFNGLGENSAMPLERSPROC glGenSamplers; -extern PFNGLGENVERTEXARRAYSPROC glGenVertexArrays; -extern PFNGLGETBUFFERPARAMETERIVPROC glGetBufferParameteriv; -extern PFNGLGETDEBUGMESSAGELOGARBPROC glGetDebugMessageLogARB; -extern PFNGLDEBUGMESSAGECALLBACKPROC glDebugMessageCallback; -extern PFNGLGETPROGRAMINFOLOGPROC glGetProgramInfoLog; -extern PFNGLGETPROGRAMIVPROC glGetProgramiv; -extern PFNGLGETSHADERIVPROC glGetShaderiv; -extern PFNGLGETSTRINGIPROC glGetStringi; -extern PFNGLISFRAMEBUFFERPROC glIsFramebuffer; -extern PFNGLLINKPROGRAMPROC glLinkProgram; -extern PFNGLMAPBUFFERPROC glMapBuffer; -extern PFNGLMAPBUFFERRANGEPROC glMapBufferRange; -extern PFNGLPROGRAMPARAMETERIPROC glProgramParameteri; -extern PFNGLSAMPLERPARAMETERFPROC glSamplerParameterf; -extern PFNGLSAMPLERPARAMETERIPROC glSamplerParameteri; -extern PFNGLSHADERSOURCEPROC glShaderSource; -extern PFNGLUNIFORM1IPROC glUniform1i; -extern PFNGLUNMAPBUFFERPROC glUnmapBuffer; -extern PFNGLUSEPROGRAMSTAGESPROC glUseProgramStages; -extern PFNGLVERTEXATTRIBIPOINTERPROC glVertexAttribIPointer; -extern PFNGLVERTEXATTRIBPOINTERPROC glVertexAttribPointer; -extern PFNGLBUFFERSUBDATAPROC glBufferSubData; -extern PFNGLFENCESYNCPROC glFenceSync; -extern PFNGLDELETESYNCPROC glDeleteSync; -extern PFNGLCLIENTWAITSYNCPROC glClientWaitSync; -extern PFNGLFLUSHMAPPEDBUFFERRANGEPROC glFlushMappedBufferRange; -extern PFNGLBLENDEQUATIONSEPARATEPROC glBlendEquationSeparate; -extern PFNGLBLENDFUNCSEPARATEPROC glBlendFuncSeparate; -// Query object -extern PFNGLBEGINQUERYPROC glBeginQuery; -extern PFNGLENDQUERYPROC glEndQuery; -extern PFNGLGETQUERYIVPROC glGetQueryiv; -extern PFNGLGETQUERYOBJECTIVPROC glGetQueryObjectiv; -extern PFNGLGETQUERYOBJECTUIVPROC glGetQueryObjectuiv; -extern PFNGLQUERYCOUNTERPROC glQueryCounter; -extern PFNGLGETQUERYOBJECTI64VPROC glGetQueryObjecti64v; -extern PFNGLGETQUERYOBJECTUI64VPROC glGetQueryObjectui64v; -extern PFNGLGETINTEGER64VPROC glGetInteger64v; -// GL4.0 -// GL4.1 -extern PFNGLBINDPROGRAMPIPELINEPROC glBindProgramPipeline; -extern PFNGLDELETEPROGRAMPIPELINESPROC glDeleteProgramPipelines; -extern PFNGLGENPROGRAMPIPELINESPROC glGenProgramPipelines; -extern PFNGLGETPROGRAMPIPELINEIVPROC glGetProgramPipelineiv; -extern PFNGLVALIDATEPROGRAMPIPELINEPROC glValidateProgramPipeline; -extern PFNGLGETPROGRAMPIPELINEINFOLOGPROC glGetProgramPipelineInfoLog; -extern PFNGLGETPROGRAMBINARYPROC glGetProgramBinary; -extern PFNGLVIEWPORTINDEXEDFPROC glViewportIndexedf; -extern PFNGLVIEWPORTINDEXEDFVPROC glViewportIndexedfv; -extern PFNGLSCISSORINDEXEDPROC glScissorIndexed; -extern PFNGLSCISSORINDEXEDVPROC glScissorIndexedv; -// NO GL4.1 -extern PFNGLUSEPROGRAMPROC glUseProgram; -extern PFNGLGETSHADERINFOLOGPROC glGetShaderInfoLog; -extern PFNGLPROGRAMUNIFORM1IPROC glProgramUniform1i; -// GL4.2 -extern PFNGLBINDIMAGETEXTUREPROC glBindImageTexture; -extern PFNGLMEMORYBARRIERPROC glMemoryBarrier; -extern PFNGLTEXSTORAGE2DPROC glTexStorage2D; -extern PFNGLPOPDEBUGGROUPPROC glPopDebugGroup; -// GL4.3 -extern PFNGLCOPYIMAGESUBDATAPROC glCopyImageSubData; -extern PFNGLINVALIDATETEXIMAGEPROC glInvalidateTexImage; -extern PFNGLPUSHDEBUGGROUPPROC glPushDebugGroup; -extern PFNGLDEBUGMESSAGEINSERTPROC glDebugMessageInsert; -extern PFNGLDEBUGMESSAGECONTROLPROC glDebugMessageControl; -// GL4.4 -extern PFNGLCLEARTEXIMAGEPROC glClearTexImage; -extern PFNGLBUFFERSTORAGEPROC glBufferStorage; - -// GL4.5 -extern PFNGLCREATETEXTURESPROC glCreateTextures; -extern PFNGLTEXTURESTORAGE2DPROC glTextureStorage2D; -extern PFNGLTEXTURESUBIMAGE2DPROC glTextureSubImage2D; -extern PFNGLCOPYTEXTURESUBIMAGE2DPROC glCopyTextureSubImage2D; -extern PFNGLBINDTEXTUREUNITPROC glBindTextureUnit; -extern PFNGLGETTEXTUREIMAGEPROC glGetTextureImage; -extern PFNGLTEXTUREPARAMETERIPROC glTextureParameteri; - -extern PFNGLCREATEFRAMEBUFFERSPROC glCreateFramebuffers; -extern PFNGLCLEARNAMEDFRAMEBUFFERFVPROC glClearNamedFramebufferfv; -extern PFNGLCLEARNAMEDFRAMEBUFFERIVPROC glClearNamedFramebufferiv; -extern PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC glClearNamedFramebufferuiv; -extern PFNGLNAMEDFRAMEBUFFERTEXTUREPROC glNamedFramebufferTexture; -extern PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC glNamedFramebufferDrawBuffers; -extern PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC glNamedFramebufferReadBuffer; -extern PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC glCheckNamedFramebufferStatus; - -extern PFNGLCREATEBUFFERSPROC glCreateBuffers; -extern PFNGLNAMEDBUFFERSTORAGEPROC glNamedBufferStorage; -extern PFNGLNAMEDBUFFERDATAPROC glNamedBufferData; -extern PFNGLNAMEDBUFFERSUBDATAPROC glNamedBufferSubData; -extern PFNGLMAPNAMEDBUFFERPROC glMapNamedBuffer; -extern PFNGLMAPNAMEDBUFFERRANGEPROC glMapNamedBufferRange; -extern PFNGLUNMAPNAMEDBUFFERPROC glUnmapNamedBuffer; -extern PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC glFlushMappedNamedBufferRange; - -extern PFNGLCREATESAMPLERSPROC glCreateSamplers; -extern PFNGLCREATEPROGRAMPIPELINESPROC glCreateProgramPipelines; - -extern PFNGLCLIPCONTROLPROC glClipControl; -extern PFNGLTEXTUREBARRIERPROC glTextureBarrier; - -namespace Emulate_DSA { - extern void SetFramebufferTarget(GLenum target); - extern void SetBufferTarget(GLenum target); - extern void Init(); -} - -namespace GLLoader { - bool check_gl_version(int major, int minor); - void init_gl_function(); - bool check_gl_supported_extension(); - - extern bool fglrx_buggy_driver; - extern bool mesa_amd_buggy_driver; - extern bool nvidia_buggy_driver; - extern bool intel_buggy_driver; - extern bool in_replayer; - - // GL - extern bool found_GL_ARB_separate_shader_objects; - extern bool found_GL_ARB_copy_image; - extern bool found_geometry_shader; - extern bool found_GL_ARB_gpu_shader5; - extern bool found_GL_ARB_shader_image_load_store; - extern bool found_GL_ARB_clear_texture; - extern bool found_GL_ARB_buffer_storage; - extern bool found_GL_ARB_clip_control; - extern bool found_GL_ARB_direct_state_access; - extern bool found_GL_ARB_texture_barrier; - extern bool found_GL_EXT_texture_filter_anisotropic; -} diff --git a/plugins/GSdx_legacy/GLState.cpp b/plugins/GSdx_legacy/GLState.cpp deleted file mode 100644 index 1ecdb4a41f..0000000000 --- a/plugins/GSdx_legacy/GLState.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GLState.h" - -namespace GLState { - GLuint fbo; - GSVector2i viewport; - GSVector4i scissor; - - bool blend; - uint16 eq_RGB; - uint16 f_sRGB; - uint16 f_dRGB; - uint8 bf; - uint32 wrgba; - - bool depth; - GLenum depth_func; - bool depth_mask; - - bool stencil; - GLenum stencil_func; - GLenum stencil_pass; - - GLuint ubo; - - GLuint ps_ss; - - GLuint rt; - GLuint ds; - GLuint tex_unit[4]; - GLuint64 tex_handle[4]; - - GLuint ps; - GLuint gs; - GLuint vs; - GLuint program; - bool dirty_prog; - - void Clear() { - fbo = 0; - viewport = GSVector2i(0, 0); - scissor = GSVector4i(0, 0, 0, 0); - - blend = false; - eq_RGB = 0; - f_sRGB = 0; - f_dRGB = 0; - bf = 0; - wrgba = 0xF; - - depth = false; - depth_func = 0; - depth_mask = false; - - stencil = false; - stencil_func = 0; - stencil_pass = 0; - - ubo = 0; - - ps_ss = 0; - - rt = 0; - ds = 0; - for (size_t i = 0; i < countof(tex_unit); i++) - tex_unit[i] = 0; - for (size_t i = 0; i < countof(tex_handle); i++) - tex_handle[i] = 0; - - ps = 0; - gs = 0; - vs = 0; - program = 0; - dirty_prog = true; - } -} diff --git a/plugins/GSdx_legacy/GLState.h b/plugins/GSdx_legacy/GLState.h deleted file mode 100644 index 18931d6cd2..0000000000 --- a/plugins/GSdx_legacy/GLState.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSdx.h" -#include "GSVector.h" - -namespace GLState { - extern GLuint fbo; // frame buffer object - extern GSVector2i viewport; - extern GSVector4i scissor; - - extern bool blend; - extern uint16 eq_RGB; - extern uint16 f_sRGB; - extern uint16 f_dRGB; - extern uint8 bf; - extern uint32 wrgba; - - extern bool depth; - extern GLenum depth_func; - extern bool depth_mask; - - extern bool stencil; - extern GLenum stencil_func; - extern GLenum stencil_pass; - - extern GLuint ubo; // uniform buffer object - - extern GLuint ps_ss; // sampler - - extern GLuint rt; // render target - extern GLuint ds; // Depth-Stencil - extern GLuint tex_unit[4]; // shader input texture - extern GLuint64 tex_handle[4]; // shader input texture - - extern GLuint ps; - extern GLuint gs; - extern GLuint vs; - extern GLuint program; // monolith program (when sso isn't supported) - extern bool dirty_prog; - - extern void Clear(); -} diff --git a/plugins/GSdx_legacy/GPU.cpp b/plugins/GSdx_legacy/GPU.cpp deleted file mode 100644 index 2c67f8a605..0000000000 --- a/plugins/GSdx_legacy/GPU.cpp +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSUtil.h" -#include "GPURendererSW.h" -#include "GSDeviceNull.h" - -#ifdef _WIN32 - -#include "GPUSettingsDlg.h" -#include "GSDevice9.h" -#include "GSDevice11.h" - -static HRESULT s_hr = E_FAIL; - -#endif - -#define PSE_LT_GPU 2 - -static GPURenderer* s_gpu = NULL; - -EXPORT_C_(uint32) PSEgetLibType() -{ - return PSE_LT_GPU; -} - -EXPORT_C_(const char*) PSEgetLibName() -{ - return GSUtil::GetLibName(); -} - -EXPORT_C_(uint32) PSEgetLibVersion() -{ - static const uint32 version = 1; - static const uint32 revision = 1; - - return version << 16 | revision << 8 | PLUGIN_VERSION; -} - -EXPORT_C_(int32) GPUinit() -{ - return 0; -} - -EXPORT_C_(int32) GPUshutdown() -{ - return 0; -} - -EXPORT_C_(int32) GPUclose() -{ - delete s_gpu; - - s_gpu = NULL; - -#ifdef _WIN32 - GSDeviceDX::FreeD3DCompiler(); - - if(SUCCEEDED(s_hr)) - { - ::CoUninitialize(); - - s_hr = E_FAIL; - } - -#endif - - return 0; -} - -EXPORT_C_(int32) GPUopen(void* hWnd) -{ - GPUclose(); - - if(!GSUtil::CheckSSE()) - { - return -1; - } - -#ifdef _WIN32 - - s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); - - if(!GSUtil::CheckDirectX()) - { - return -1; - } - - if (!GSDeviceDX::LoadD3DCompiler()) - return -1; -#endif - - int renderer = theApp.GetConfig("Renderer", 1); - int threads = theApp.GetConfig("extrathreads", DEFAULT_EXTRA_RENDERING_THREADS); - - switch(renderer) - { - default: - #ifdef _WIN32 - case 0: s_gpu = new GPURendererSW(new GSDevice9(), threads); break; - case 1: s_gpu = new GPURendererSW(new GSDevice11(), threads); break; - #endif - case 3: s_gpu = new GPURendererSW(new GSDeviceNull(), threads); break; - //case 4: s_gpu = new GPURendererNull(new GSDeviceNull()); break; - } - - if(!s_gpu->Create(hWnd)) - { - GPUclose(); - - return -1; - } - - return 0; -} - -EXPORT_C_(int32) GPUconfigure() -{ -#ifdef _WIN32 - - GPUSettingsDlg dlg; - - if(IDOK == dlg.DoModal()) - { - GPUshutdown(); - GPUinit(); - } - -#else - - // TODO: linux -#endif - - return 0; -} - -EXPORT_C_(int32) GPUtest() -{ - return 0; -} - -EXPORT_C GPUabout() -{ - // TODO -} - -EXPORT_C GPUwriteDataMem(const uint8* mem, uint32 size) -{ - s_gpu->WriteData(mem, size); -} - -EXPORT_C GPUwriteData(uint32 data) -{ - s_gpu->WriteData((uint8*)&data, 1); -} - -EXPORT_C GPUreadDataMem(uint8* mem, uint32 size) -{ - s_gpu->ReadData(mem, size); -} - -EXPORT_C_(uint32) GPUreadData() -{ - uint32 data = 0; - - s_gpu->ReadData((uint8*)&data, 1); - - return data; -} - -EXPORT_C GPUwriteStatus(uint32 status) -{ - s_gpu->WriteStatus(status); -} - -EXPORT_C_(uint32) GPUreadStatus() -{ - return s_gpu->ReadStatus(); -} - -EXPORT_C_(uint32) GPUdmaChain(const uint8* mem, uint32 addr) -{ - uint32 last[3]; - - memset(last, 0xff, sizeof(last)); - - do - { - if(addr == last[1] || addr == last[2]) - { - break; - } - - (addr < last[0] ? last[1] : last[2]) = addr; - - last[0] = addr; - - uint8 size = mem[addr + 3]; - - if(size > 0) - { - s_gpu->WriteData(&mem[addr + 4], size); - } - - addr = *(uint32*)&mem[addr] & 0xffffff; - } - while(addr != 0xffffff); - - return 0; -} - -EXPORT_C_(uint32) GPUgetMode() -{ - // TODO - - return 0; -} - -EXPORT_C GPUsetMode(uint32 mode) -{ - // TODO -} - -EXPORT_C GPUupdateLace() -{ - s_gpu->VSync(); -} - -EXPORT_C GPUmakeSnapshot() -{ - s_gpu->MakeSnapshot("c:/"); // TODO -} - -EXPORT_C GPUdisplayText(char* text) -{ - // TODO -} - -EXPORT_C GPUdisplayFlags(uint32 flags) -{ - // TODO -} - -EXPORT_C_(int32) GPUfreeze(uint32 type, GPUFreezeData* data) -{ - if(!data || data->version != 1) - { - return 0; - } - - if(type == 0) - { - s_gpu->Defrost(data); - - return 1; - } - else if(type == 1) - { - s_gpu->Freeze(data); - - return 1; - } - else if(type == 2) - { - int slot = *(int*)data + 1; - - if(slot < 1 || slot > 9) - { - return 0; - } - - // TODO - - return 1; - } - - return 0; -} - -EXPORT_C GPUgetScreenPic(uint8* mem) -{ - // TODO -} - -EXPORT_C GPUshowScreenPic(uint8* mem) -{ - // TODO -} - -EXPORT_C GPUcursor(int player, int x, int y) -{ - // TODO -} diff --git a/plugins/GSdx_legacy/GPU.h b/plugins/GSdx_legacy/GPU.h deleted file mode 100644 index 18342ea3d6..0000000000 --- a/plugins/GSdx_legacy/GPU.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#pragma pack(push, 1) - -#include "GS.h" - -enum -{ - GPU_POLYGON = 1, - GPU_LINE = 2, - GPU_SPRITE = 3, -}; - -REG32_(GPUReg, STATUS) - uint32 TX:4; - uint32 TY:1; - uint32 ABR:2; - uint32 TP:2; - uint32 DTD:1; - uint32 DFE:1; - uint32 MD:1; - uint32 ME:1; - uint32 _PAD0:3; - uint32 WIDTH1:1; - uint32 WIDTH0:2; - uint32 HEIGHT:1; - uint32 ISPAL:1; - uint32 ISRGB24:1; - uint32 ISINTER:1; - uint32 DEN:1; - uint32 _PAD1:2; - uint32 IDLE:1; - uint32 IMG:1; - uint32 COM:1; - uint32 DMA:2; - uint32 LCF:1; - /* - uint32 TX:4; - uint32 TY:1; - uint32 ABR:2; - uint32 TP:2; - uint32 DTD:1; - uint32 DFE:1; - uint32 PBW:1; - uint32 PBC:1; - uint32 _PAD0:3; - uint32 HRES2:1; - uint32 HRES1:2; - uint32 VRES:1; - uint32 ISPAL:1; - uint32 ISRGB24:1; - uint32 ISINTER:1; - uint32 ISSTOP:1; - uint32 _PAD1:1; - uint32 DMARDY:1; - uint32 IDIDLE:1; - uint32 DATARDY:1; - uint32 ISEMPTY:1; - uint32 TMODE:2; - uint32 ODE:1; - */ -REG_END - -REG32_(GPUReg, PACKET) - uint32 _PAD:24; - uint32 OPTION:5; - uint32 TYPE:3; -REG_END - -REG32_(GPUReg, PRIM) - uint32 VTX:24; - uint32 TGE:1; - uint32 ABE:1; - uint32 TME:1; - uint32 _PAD2:1; - uint32 IIP:1; - uint32 TYPE:3; -REG_END - -REG32_(GPUReg, POLYGON) - uint32 _PAD:24; - uint32 TGE:1; - uint32 ABE:1; - uint32 TME:1; - uint32 VTX:1; - uint32 IIP:1; - uint32 TYPE:3; -REG_END - -REG32_(GPUReg, LINE) - uint32 _PAD:24; - uint32 ZERO1:1; - uint32 ABE:1; - uint32 ZERO2:1; - uint32 PLL:1; - uint32 IIP:1; - uint32 TYPE:3; -REG_END - -REG32_(GPUReg, SPRITE) - uint32 _PAD:24; - uint32 ZERO:1; - uint32 ABE:1; - uint32 TME:1; - uint32 SIZE:2; - uint32 TYPE:3; -REG_END - -REG32_(GPUReg, RESET) - uint32 _PAD:32; -REG_END - -REG32_(GPUReg, DEN) - uint32 DEN:1; - uint32 _PAD:31; -REG_END - -REG32_(GPUReg, DMA) - uint32 DMA:2; - uint32 _PAD:30; -REG_END - -REG32_(GPUReg, DAREA) - uint32 X:10; - uint32 Y:9; - uint32 _PAD:13; -REG_END - -REG32_(GPUReg, DHRANGE) - uint32 X1:12; - uint32 X2:12; - uint32 _PAD:8; -REG_END - -REG32_(GPUReg, DVRANGE) - uint32 Y1:10; - uint32 Y2:11; - uint32 _PAD:11; -REG_END - -REG32_(GPUReg, DMODE) - uint32 WIDTH0:2; - uint32 HEIGHT:1; - uint32 ISPAL:1; - uint32 ISRGB24:1; - uint32 ISINTER:1; - uint32 WIDTH1:1; - uint32 REVERSE:1; - uint32 _PAD:24; -REG_END - -REG32_(GPUReg, GPUINFO) - uint32 PARAM:24; - uint32 _PAD:8; -REG_END - -REG32_(GPUReg, MODE) - uint32 TX:4; - uint32 TY:1; - uint32 ABR:2; - uint32 TP:2; - uint32 DTD:1; - uint32 DFE:1; - uint32 _PAD:21; -REG_END - -REG32_(GPUReg, MASK) - uint32 MD:1; - uint32 ME:1; - uint32 _PAD:30; -REG_END - -REG32_(GPUReg, DRAREA) - uint32 X:10; - uint32 Y:10; - uint32 _PAD:12; -REG_END - -REG32_(GPUReg, DROFF) - int32 X:11; - int32 Y:11; - int32 _PAD:10; -REG_END - -REG32_(GPUReg, RGB) - uint32 R:8; - uint32 G:8; - uint32 B:8; - uint32 _PAD:8; -REG_END - -REG32_(GPUReg, XY) - int32 X:11; - int32 _PAD1:5; - int32 Y:11; - int32 _PAD2:5; -REG_END - -REG32_(GPUReg, UV) - uint32 U:8; - uint32 V:8; - uint32 _PAD:16; -REG_END - -REG32_(GPUReg, TWIN) - uint32 TWW:5; - uint32 TWH:5; - uint32 TWX:5; - uint32 TWY:5; - uint32 _PAD:12; -REG_END - -REG32_(GPUReg, CLUT) - uint32 _PAD1:16; - uint32 X:6; - uint32 Y:9; - uint32 _PAD2:1; -REG_END - -REG32_SET(GPUReg) - GPURegSTATUS STATUS; - GPURegPACKET PACKET; - GPURegPRIM PRIM; - GPURegPOLYGON POLYGON; - GPURegLINE LINE; - GPURegSPRITE SPRITE; - GPURegRESET RESET; - GPURegDEN DEN; - GPURegDMA DMA; - GPURegDAREA DAREA; - GPURegDHRANGE DHRANGE; - GPURegDVRANGE DVRANGE; - GPURegDMODE DMODE; - GPURegGPUINFO GPUINFO; - GPURegMODE MODE; - GPURegMASK MASK; - GPURegDRAREA DRAREA; - GPURegDROFF DROFF; - GPURegRGB RGB; - GPURegXY XY; - GPURegUV UV; - GPURegTWIN TWIN; - GPURegCLUT CLUT; -REG_SET_END - -struct GPUFreezeData -{ - uint32 version; // == 1 - uint32 status; - uint32 control[256]; - uint16 vram[1024 * 1024]; -}; - -#pragma pack(pop) - diff --git a/plugins/GSdx_legacy/GPUDrawScanline.cpp b/plugins/GSdx_legacy/GPUDrawScanline.cpp deleted file mode 100644 index 4159fd9d93..0000000000 --- a/plugins/GSdx_legacy/GPUDrawScanline.cpp +++ /dev/null @@ -1,495 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GPUDrawScanline.h" - -GPUDrawScanline::GPUDrawScanline() - : m_sp_map("GPUSetupPrim", &m_local) - , m_ds_map("GPUDrawScanline", &m_local) -{ - memset(&m_local, 0, sizeof(m_local)); - - m_local.gd = &m_global; -} - -GPUDrawScanline::~GPUDrawScanline() -{ -} - -void GPUDrawScanline::BeginDraw(const GSRasterizerData* data) -{ - memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global)); - - if(m_global.sel.tme && m_global.sel.twin) - { - uint32 u, v; - - u = ~(m_global.twin.x << 3) & 0xff; // TWW - v = ~(m_global.twin.y << 3) & 0xff; // TWH - - m_local.twin[0].u = GSVector4i((u << 16) | u); - m_local.twin[0].v = GSVector4i((v << 16) | v); - - u = m_global.twin.z << 3; // TWX - v = m_global.twin.w << 3; // TWY - - m_local.twin[1].u = GSVector4i((u << 16) | u) & ~m_local.twin[0].u; - m_local.twin[1].v = GSVector4i((v << 16) | v) & ~m_local.twin[0].v; - } - - m_ds = m_ds_map[m_global.sel]; - - m_de = NULL; - - m_dr = NULL; // TODO - - // doesn't need all bits => less functions generated - - GPUScanlineSelector sel; - - sel.key = 0; - - sel.iip = m_global.sel.iip; - sel.tfx = m_global.sel.tfx; - sel.twin = m_global.sel.twin; - sel.sprite = m_global.sel.sprite; - - m_sp = m_sp_map[sel]; -} - -void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total) -{ - m_ds_map.UpdateStats(frame, ticks, actual, total); -} - -#ifndef ENABLE_JIT_RASTERIZER - -void GPUDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) -{ - GPUScanlineSelector sel = m_global.sel; - - const GSVector4* shift = GPUSetupPrimCodeGenerator::m_shift; - - if(sel.tme && !sel.twin) - { - if(sel.sprite) - { - GSVector4i t = (GSVector4i(vertex[index[1]].t) >> 8) - GSVector4i::x00000001(); - - t = t.ps32(t); - t = t.upl16(t); - - m_local.twin[2].u = t.xxxx(); - m_local.twin[2].v = t.yyyy(); - } - else - { - // TODO: not really needed - - m_local.twin[2].u = GSVector4i::x00ff(); - m_local.twin[2].v = GSVector4i::x00ff(); - } - } - - if(sel.tme || sel.iip && sel.tfx != 3) - { - GSVector4 dt = dscan.t; - GSVector4 dc = dscan.c; - - GSVector4i dtc8 = GSVector4i(dt * shift[0]).ps32(GSVector4i(dc * shift[0])); - - if(sel.tme) - { - m_local.d8.st = dtc8.upl16(dtc8); - } - - if(sel.iip && sel.tfx != 3) - { - m_local.d8.c = dtc8.uph16(dtc8); - } - - if(sel.tme) - { - GSVector4 dtx = dt.xxxx(); - GSVector4 dty = dt.yyyy(); - - m_local.d.s = GSVector4i(dtx * shift[1]).ps32(GSVector4i(dtx * shift[2])); - m_local.d.t = GSVector4i(dty * shift[1]).ps32(GSVector4i(dty * shift[2])); - } - - if(sel.iip && sel.tfx != 3) - { - GSVector4 dcx = dc.xxxx(); - GSVector4 dcy = dc.yyyy(); - GSVector4 dcz = dc.zzzz(); - - m_local.d.r = GSVector4i(dcx * shift[1]).ps32(GSVector4i(dcx * shift[2])); - m_local.d.g = GSVector4i(dcy * shift[1]).ps32(GSVector4i(dcy * shift[2])); - m_local.d.b = GSVector4i(dcz * shift[1]).ps32(GSVector4i(dcz * shift[2])); - } - } -} - -void GPUDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) -{ - // TODO: not tested yet, probably bogus - - GPUScanlineSelector sel = m_global.sel; - - GSVector4i s, t; - GSVector4i uf, vf; - GSVector4i rf, gf, bf; - GSVector4i dither; - - // Init - - uint16* fb = (uint16*)m_global.vm + (top << (10 + sel.scalex)) + left; - - int steps = pixels - 8; - - if(sel.dtd) - { - dither = GSVector4i::load(&GPUDrawScanlineCodeGenerator::m_dither[top & 3][left & 3]); - } - - if(sel.tme) - { - GSVector4i vt = GSVector4i(scan.t).xxzzl(); - - s = vt.xxxx().add16(m_local.d.s); - t = vt.yyyy(); - - if(!sel.sprite) - { - t = t.add16(m_local.d.t); - } - else - { - if(sel.ltf) - { - vf = t.sll16(1).srl16(1); - } - } - } - - if(sel.tfx != 3) - { - GSVector4i vc = GSVector4i(scan.c).xxzzlh(); - - rf = vc.xxxx(); - gf = vc.yyyy(); - bf = vc.zzzz(); - - if(sel.iip) - { - rf = rf.add16(m_local.d.r); - gf = gf.add16(m_local.d.g); - bf = bf.add16(m_local.d.b); - } - } - - while(1) - { - do - { - GSVector4i test = GPUDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))]; - - GSVector4i fd = GSVector4i::load(fb, fb + 8); - - GSVector4i r, g, b, a; - - // TestMask - - if(sel.me) - { - test |= fd.sra16(15); - - if(test.alltrue()) continue; - } - - // SampleTexture - - if(sel.tme) - { - GSVector4i u0, v0, u1, v1; - GSVector4i addr00, addr01, addr10, addr11; - GSVector4i c00, c01, c10, c11; - - if(sel.ltf) - { - u0 = s.sub16(GSVector4i(0x00200020)); // - 0.125f - v0 = t.sub16(GSVector4i(0x00200020)); // - 0.125f - - uf = u0.sll16(8).srl16(1); - vf = v0.sll16(8).srl16(1);; - } - else - { - u0 = s; - v0 = t; - } - - u0 = u0.srl16(8); - v0 = v0.srl16(8); - - if(sel.ltf) - { - u1 = u0.add16(GSVector4i::x0001()); - v1 = v0.add16(GSVector4i::x0001()); - - if(sel.twin) - { - u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u); - v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v); - u1 = (u1 & m_local.twin[0].u).add16(m_local.twin[1].u); - v1 = (v1 & m_local.twin[0].v).add16(m_local.twin[1].v); - } - else - { - u0 = u0.min_i16(m_local.twin[2].u); - v0 = v0.min_i16(m_local.twin[2].v); - u1 = u1.min_i16(m_local.twin[2].u); - v1 = v1.min_i16(m_local.twin[2].v); - } - - addr00 = v0.sll16(8) | u0; - addr01 = v0.sll16(8) | u1; - addr10 = v1.sll16(8) | u0; - addr11 = v1.sll16(8) | u1; - - // TODO - - if(sel.tlu) - { - c00 = addr00.gather16_16((const uint16*)m_global.vm, m_global.clut); - c01 = addr01.gather16_16((const uint16*)m_global.vm, m_global.clut); - c10 = addr10.gather16_16((const uint16*)m_global.vm, m_global.clut); - c11 = addr11.gather16_16((const uint16*)m_global.vm, m_global.clut); - } - else - { - c00 = addr00.gather16_16((const uint16*)m_global.vm); - c01 = addr01.gather16_16((const uint16*)m_global.vm); - c10 = addr10.gather16_16((const uint16*)m_global.vm); - c11 = addr11.gather16_16((const uint16*)m_global.vm); - } - - GSVector4i r00 = c00.sll16(11).srl16(8); - GSVector4i r01 = c01.sll16(11).srl16(8); - GSVector4i r10 = c10.sll16(11).srl16(8); - GSVector4i r11 = c11.sll16(11).srl16(8); - - r00 = r00.lerp16<0>(r01, uf); - r10 = r10.lerp16<0>(r11, uf); - - GSVector4i g00 = c00.sll16(6).srl16(11).sll16(3); - GSVector4i g01 = c01.sll16(6).srl16(11).sll16(3); - GSVector4i g10 = c10.sll16(6).srl16(11).sll16(3); - GSVector4i g11 = c11.sll16(6).srl16(11).sll16(3); - - g00 = g00.lerp16<0>(g01, uf); - g10 = g10.lerp16<0>(g11, uf); - - GSVector4i b00 = c00.sll16(1).srl16(11).sll16(3); - GSVector4i b01 = c01.sll16(1).srl16(11).sll16(3); - GSVector4i b10 = c10.sll16(1).srl16(11).sll16(3); - GSVector4i b11 = c11.sll16(1).srl16(11).sll16(3); - - b00 = b00.lerp16<0>(b01, uf); - b10 = b10.lerp16<0>(b11, uf); - - GSVector4i a00 = c00.sra16(15).sll16(8); - GSVector4i a01 = c01.sra16(15).sll16(8); - GSVector4i a10 = c10.sra16(15).sll16(8); - GSVector4i a11 = c11.sra16(15).sll16(8); - - a00 = a00.lerp16<0>(a01, uf); - a10 = a10.lerp16<0>(a11, uf); - - r = r00.lerp16<0>(r10, vf); - g = g00.lerp16<0>(g10, vf); - b = b00.lerp16<0>(b10, vf); - a = a00.lerp16<0>(a10, vf); - - test |= (r | g | b | a).eq16(GSVector4i::zero()); // mask out blank pixels (not perfect) - - a = a.gt16(GSVector4i::zero()); - } - else - { - if(sel.twin) - { - u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u); - v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v); - } - else - { - u0 = u0.min_i16(m_local.twin[2].u); - v0 = v0.min_i16(m_local.twin[2].v); - } - - addr00 = v0.sll16(8) | u0; - - // TODO - - if(sel.tlu) - { - c00 = addr00.gather16_16((const uint16*)m_global.vm, m_global.clut); - } - else - { - c00 = addr00.gather16_16((const uint16*)m_global.vm); - } - - r = (c00 << 3) & 0x00f800f8; - g = (c00 >> 2) & 0x00f800f8; - b = (c00 >> 7) & 0x00f800f8; - a = c00.sra16(15); - - test |= c00.eq16(GSVector4i::zero()); // mask out blank pixels - } - } - - // ColorTFX - - switch(sel.tfx) - { - case 0: // none (tfx = 0) - case 1: // none (tfx = tge) - r = rf.srl16(7); - g = gf.srl16(7); - b = bf.srl16(7); - break; - case 2: // modulate (tfx = tme | tge) - r = r.modulate16<1>(rf).clamp8(); - g = g.modulate16<1>(gf).clamp8(); - b = b.modulate16<1>(bf).clamp8(); - break; - case 3: // decal (tfx = tme) - break; - default: - __assume(0); - } - - // AlphaBlend - - if(sel.abe) - { - GSVector4i rs = r; - GSVector4i gs = g; - GSVector4i bs = b; - GSVector4i rd = (fd & 0x001f001f) << 3; - GSVector4i gd = (fd & 0x03e003e0) >> 2; - GSVector4i bd = (fd & 0x7c007c00) >> 7; - - switch(sel.abr) - { - case 0: - r = rd.avg8(rs); - g = gd.avg8(gs); - b = bd.avg8(bs); - break; - case 1: - r = rd.addus8(rs); - g = gd.addus8(gs); - b = bd.addus8(bs); - break; - case 2: - r = rd.subus8(rs); - g = gd.subus8(gs); - b = bd.subus8(bs); - break; - case 3: - r = rd.addus8(rs.srl16(2)); - g = gd.addus8(gs.srl16(2)); - b = bd.addus8(bs.srl16(2)); - break; - default: - __assume(0); - } - - if(sel.tme) - { - r = rs.blend8(rd, a); - g = gs.blend8(gd, a); - b = bs.blend8(bd, a); - } - } - - // Dither - - if(sel.dtd) - { - r = r.addus8(dither); - g = g.addus8(dither); - b = b.addus8(dither); - } - - // WriteFrame - - GSVector4i fs = r | g | b | (sel.md ? GSVector4i(0x80008000) : sel.tme ? a : GSVector4i::zero()); - - fs = fs.blend8(fd, test); - - GSVector4i::store(fb, fb + 8, fs); - } - while(0); - - if(steps <= 0) break; - - steps -= 8; - - fb += 8; - - if(sel.tme) - { - GSVector4i st = m_local.d8.st; - - s = s.add16(st.xxxx()); - t = t.add16(st.yyyy()); - } - - if(sel.tfx != 3) // != decal - { - if(sel.iip) - { - GSVector4i c = m_local.d8.c; - - rf = rf.add16(c.xxxx()); - gf = gf.add16(c.yyyy()); - bf = bf.add16(c.zzzz()); - } - } - } -} - -void GPUDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) -{ - ASSERT(0); -} - -void GPUDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v) -{ - // TODO -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx_legacy/GPUDrawScanline.h b/plugins/GSdx_legacy/GPUDrawScanline.h deleted file mode 100644 index d7c7e26155..0000000000 --- a/plugins/GSdx_legacy/GPUDrawScanline.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPUState.h" -#include "GSRasterizer.h" -#include "GPUScanlineEnvironment.h" -#include "GPUSetupPrimCodeGenerator.h" -#include "GPUDrawScanlineCodeGenerator.h" - -class GPUDrawScanline : public IDrawScanline -{ -public: - class SharedData : public GSRasterizerData - { - public: - GPUScanlineGlobalData global; - - public: - SharedData() - { - global.clut = NULL; - } - - virtual ~SharedData() - { - if(global.clut) _aligned_free(global.clut); - } - }; - -protected: - GPUScanlineGlobalData m_global; - GPUScanlineLocalData m_local; - - GSCodeGeneratorFunctionMap m_sp_map; - GSCodeGeneratorFunctionMap m_ds_map; - -public: - GPUDrawScanline(); - virtual ~GPUDrawScanline(); - - // IDrawScanline - - void BeginDraw(const GSRasterizerData* data); - void EndDraw(uint64 frame, uint64 ticks, int actual, int total); - -#ifndef ENABLE_JIT_RASTERIZER - - void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan); - void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan); - void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan); - void DrawRect(const GSVector4i& r, const GSVertexSW& v); - -#endif - - void PrintStats() {m_ds_map.PrintStats();} -}; diff --git a/plugins/GSdx_legacy/GPUDrawScanlineCodeGenerator.cpp b/plugins/GSdx_legacy/GPUDrawScanlineCodeGenerator.cpp deleted file mode 100644 index c92e28d7c3..0000000000 --- a/plugins/GSdx_legacy/GPUDrawScanlineCodeGenerator.cpp +++ /dev/null @@ -1,1031 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -// TODO: x64 - -#include "stdafx.h" -#include "GPUDrawScanlineCodeGenerator.h" -#include "GSVertexSW.h" - -static const int _args = 8; -static const int _top = _args + 4; -static const int _v = _args + 8; - -GPUDrawScanlineCodeGenerator::GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) - , m_local(*(GPUScanlineLocalData*)param) -{ - m_sel.key = key; - - Generate(); -} - -void GPUDrawScanlineCodeGenerator::Generate() -{ - push(esi); - push(edi); - - Init(); - - align(16); - -L("loop"); - - // GSVector4i test = m_test[7 + (steps & (steps >> 31))]; - - mov(edx, ecx); - sar(edx, 31); - and(edx, ecx); - shl(edx, 4); - - movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); - - // movdqu(xmm1, ptr[edi]); - - movq(xmm1, qword[edi]); - movhps(xmm1, qword[edi + 8]); - - // ecx = steps - // esi = tex (tme) - // edi = fb - // xmm1 = fd - // xmm2 = s - // xmm3 = t - // xmm4 = r - // xmm5 = g - // xmm6 = b - // xmm7 = test - - TestMask(); - - SampleTexture(); - - // xmm1 = fd - // xmm3 = a - // xmm4 = r - // xmm5 = g - // xmm6 = b - // xmm7 = test - // xmm0, xmm2 = free - - ColorTFX(); - - AlphaBlend(); - - Dither(); - - WriteFrame(); - -L("step"); - - // if(steps <= 0) break; - - test(ecx, ecx); - jle("exit", T_NEAR); - - Step(); - - jmp("loop", T_NEAR); - -L("exit"); - - pop(edi); - pop(esi); - - ret(8); -} - -void GPUDrawScanlineCodeGenerator::Init() -{ - mov(eax, dword[esp + _top]); - - // uint16* fb = (uint16*)m_global.vm + (top << (10 + sel.scalex)) + left; - - mov(edi, eax); - shl(edi, 10 + m_sel.scalex); - add(edi, edx); - lea(edi, ptr[edi * 2 + (size_t)m_local.gd->vm]); - - // int steps = pixels - 8; - - sub(ecx, 8); - - if(m_sel.dtd) - { - // dither = GSVector4i::load(&m_dither[top & 3][left & 3]); - - and(eax, 3); - shl(eax, 5); - and(edx, 3); - shl(edx, 1); - movdqu(xmm0, ptr[eax + edx + (size_t)m_dither]); - movdqa(ptr[&m_local.temp.dither], xmm0); - } - - mov(edx, dword[esp + _v]); - - if(m_sel.tme) - { - mov(esi, dword[&m_local.gd->tex]); - - // GSVector4i vt = GSVector4i(v.t).xxzzl(); - - cvttps2dq(xmm4, ptr[edx + offsetof(GSVertexSW, t)]); - pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); - - // s = vt.xxxx().add16(m_local.d.s); - // t = vt.yyyy().add16(m_local.d.t); - - pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); - - paddw(xmm2, ptr[&m_local.d.s]); - - if(!m_sel.sprite) - { - paddw(xmm3, ptr[&m_local.d.t]); - } - else - { - if(m_sel.ltf) - { - movdqa(xmm0, xmm3); - psllw(xmm0, 8); - psrlw(xmm0, 1); - movdqa(ptr[&m_local.temp.vf], xmm0); - } - } - - movdqa(ptr[&m_local.temp.s], xmm2); - movdqa(ptr[&m_local.temp.t], xmm3); - } - - if(m_sel.tfx != 3) // != decal - { - // GSVector4i vc = GSVector4i(v.c).xxzzlh(); - - cvttps2dq(xmm6, ptr[edx + offsetof(GSVertexSW, c)]); - pshuflw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - - // r = vc.xxxx(); - // g = vc.yyyy(); - // b = vc.zzzz(); - - pshufd(xmm4, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 1, 1, 1)); - pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2)); - - if(m_sel.iip) - { - // r = r.add16(m_local.d.r); - // g = g.add16(m_local.d.g); - // b = b.add16(m_local.d.b); - - paddw(xmm4, ptr[&m_local.d.r]); - paddw(xmm5, ptr[&m_local.d.g]); - paddw(xmm6, ptr[&m_local.d.b]); - } - - movdqa(ptr[&m_local.temp.r], xmm4); - movdqa(ptr[&m_local.temp.g], xmm5); - movdqa(ptr[&m_local.temp.b], xmm6); - } -} - -void GPUDrawScanlineCodeGenerator::Step() -{ - // steps -= 8; - - sub(ecx, 8); - - // fb += 8; - - add(edi, 8 * sizeof(uint16)); - - if(m_sel.tme) - { - // GSVector4i st = m_local.d8.st; - - movdqa(xmm4, ptr[&m_local.d8.st]); - - // s = s.add16(st.xxxx()); - // t = t.add16(st.yyyy()); - - pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - paddw(xmm2, ptr[&m_local.temp.s]); - movdqa(ptr[&m_local.temp.s], xmm2); - - // TODO: if(!sprite) ... else reload t - - pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); - paddw(xmm3, ptr[&m_local.temp.t]); - movdqa(ptr[&m_local.temp.t], xmm3); - } - - if(m_sel.tfx != 3) // != decal - { - if(m_sel.iip) - { - // GSVector4i c = m_local.d8.c; - - // r = r.add16(c.xxxx()); - // g = g.add16(c.yyyy()); - // b = b.add16(c.zzzz()); - - movdqa(xmm6, ptr[&m_local.d8.c]); - - pshufd(xmm4, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 1, 1, 1)); - pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2)); - - paddw(xmm4, ptr[&m_local.temp.r]); - paddw(xmm5, ptr[&m_local.temp.g]); - paddw(xmm6, ptr[&m_local.temp.b]); - - movdqa(ptr[&m_local.temp.r], xmm4); - movdqa(ptr[&m_local.temp.g], xmm5); - movdqa(ptr[&m_local.temp.b], xmm6); - } - else - { - movdqa(xmm4, ptr[&m_local.temp.r]); - movdqa(xmm5, ptr[&m_local.temp.g]); - movdqa(xmm6, ptr[&m_local.temp.b]); - } - } -} - -void GPUDrawScanlineCodeGenerator::TestMask() -{ - if(!m_sel.me) - { - return; - } - - // test |= fd.sra16(15); - - movdqa(xmm0, xmm1); - psraw(xmm0, 15); - por(xmm7, xmm0); - - alltrue(); -} - -void GPUDrawScanlineCodeGenerator::SampleTexture() -{ - if(!m_sel.tme) - { - return; - } - - if(m_sel.tlu) - { - mov(edx, ptr[&m_local.gd->clut]); - } - - // xmm2 = s - // xmm3 = t - // xmm7 = test - // xmm0, xmm4, xmm5, xmm6 = free - // xmm1 = used - - if(m_sel.ltf) - { - // GSVector4i u = s.sub16(GSVector4i(0x00200020)); // - 0.125f - // GSVector4i v = t.sub16(GSVector4i(0x00200020)); // - 0.125f - - mov(eax, 0x00200020); - movd(xmm0, eax); - pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - - psubw(xmm2, xmm0); - psubw(xmm3, xmm0); - - // GSVector4i uf = (u & GSVector4i::x00ff()) << 7; - // GSVector4i vf = (v & GSVector4i::x00ff()) << 7; - - movdqa(xmm0, xmm2); - psllw(xmm0, 8); - psrlw(xmm0, 1); - movdqa(ptr[&m_local.temp.uf], xmm0); - - if(!m_sel.sprite) - { - movdqa(xmm0, xmm3); - psllw(xmm0, 8); - psrlw(xmm0, 1); - movdqa(ptr[&m_local.temp.vf], xmm0); - } - } - - // GSVector4i u0 = s.srl16(8); - // GSVector4i v0 = t.srl16(8); - - psrlw(xmm2, 8); - psrlw(xmm3, 8); - - // xmm2 = u - // xmm3 = v - // xmm7 = test - // xmm0, xmm4, xmm5, xmm6 = free - // xmm1 = used - - if(m_sel.ltf) - { - // GSVector4i u1 = u0.add16(GSVector4i::x0001()); - // GSVector4i v1 = v0.add16(GSVector4i::x0001()); - - movdqa(xmm4, xmm2); - movdqa(xmm5, xmm3); - - pcmpeqd(xmm0, xmm0); - psrlw(xmm0, 15); - paddw(xmm4, xmm0); - paddw(xmm5, xmm0); - - if(m_sel.twin) - { - // u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u); - // v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v); - // u1 = (u1 & m_local.twin[0].u).add16(m_local.twin[1].u); - // v1 = (v1 & m_local.twin[0].v).add16(m_local.twin[1].v); - - movdqa(xmm0, ptr[&m_local.twin[0].u]); - movdqa(xmm6, ptr[&m_local.twin[1].u]); - - pand(xmm2, xmm0); - paddw(xmm2, xmm6); - pand(xmm4, xmm0); - paddw(xmm4, xmm6); - - movdqa(xmm0, ptr[&m_local.twin[0].v]); - movdqa(xmm6, ptr[&m_local.twin[1].v]); - - pand(xmm3, xmm0); - paddw(xmm3, xmm6); - pand(xmm5, xmm0); - paddw(xmm5, xmm6); - } - else - { - // u0 = u0.min_i16(m_local.twin[2].u); - // v0 = v0.min_i16(m_local.twin[2].v); - // u1 = u1.min_i16(m_local.twin[2].u); - // v1 = v1.min_i16(m_local.twin[2].v); - - // TODO: if(!sprite) clamp16 else: - - movdqa(xmm0, ptr[&m_local.twin[2].u]); - movdqa(xmm6, ptr[&m_local.twin[2].v]); - - pminsw(xmm2, xmm0); - pminsw(xmm3, xmm6); - pminsw(xmm4, xmm0); - pminsw(xmm5, xmm6); - } - - // xmm2 = u0 - // xmm3 = v0 - // xmm4 = u1 - // xmm5 = v1 - // xmm7 = test - // xmm0, xmm6 = free - // xmm1 = used - - // GSVector4i addr00 = v0.sll16(8) | u0; - // GSVector4i addr01 = v0.sll16(8) | u1; - // GSVector4i addr10 = v1.sll16(8) | u0; - // GSVector4i addr11 = v1.sll16(8) | u1; - - psllw(xmm3, 8); - movdqa(xmm0, xmm3); - por(xmm3, xmm2); - por(xmm0, xmm4); - - psllw(xmm5, 8); - movdqa(xmm6, xmm5); - por(xmm5, xmm2); - por(xmm6, xmm4); - - // xmm3 = addr00 - // xmm0 = addr01 - // xmm5 = addr10 - // xmm6 = addr11 - // xmm7 = test - // xmm2, xmm4 = free - // xmm1 = used - - ReadTexel(xmm2, xmm3); - ReadTexel(xmm4, xmm0); - ReadTexel(xmm3, xmm5); - ReadTexel(xmm5, xmm6); - - // xmm2 = c00 - // xmm4 = c01 - // xmm3 = c10 - // xmm5 = c11 - // xmm7 = test - // xmm0, xmm6 = free - // xmm1 = used - - // spill (TODO) - - movdqa(ptr[&m_local.temp.fd], xmm1); - movdqa(ptr[&m_local.temp.test], xmm7); - - // xmm2 = c00 - // xmm4 = c01 - // xmm3 = c10 - // xmm5 = c11 - // xmm0, xmm1, xmm6, xmm7 = free - - movdqa(xmm1, xmm2); - psllw(xmm1, 11); - psrlw(xmm1, 8); - - movdqa(xmm0, xmm4); - psllw(xmm0, 11); - psrlw(xmm0, 8); - - lerp16<0>(xmm0, xmm1, ptr[&m_local.temp.uf]); - - movdqa(xmm6, xmm2); - psllw(xmm6, 6); - psrlw(xmm6, 11); - psllw(xmm6, 3); - - movdqa(xmm1, xmm4); - psllw(xmm1, 6); - psrlw(xmm1, 11); - psllw(xmm1, 3); - - lerp16<0>(xmm1, xmm6, ptr[&m_local.temp.uf]); - - movdqa(xmm7, xmm2); - psllw(xmm7, 1); - psrlw(xmm7, 11); - psllw(xmm7, 3); - - movdqa(xmm6, xmm4); - psllw(xmm6, 1); - psrlw(xmm6, 11); - psllw(xmm6, 3); - - lerp16<0>(xmm6, xmm7, ptr[&m_local.temp.uf]); - - psraw(xmm2, 15); - psrlw(xmm2, 8); - psraw(xmm4, 15); - psrlw(xmm4, 8); - - lerp16<0>(xmm4, xmm2, ptr[&m_local.temp.uf]); - - // xmm0 = r00 - // xmm1 = g00 - // xmm6 = b00 - // xmm4 = a00 - // xmm3 = c10 - // xmm5 = c11 - // xmm2, xmm7 = free - - movdqa(xmm7, xmm3); - psllw(xmm7, 11); - psrlw(xmm7, 8); - - movdqa(xmm2, xmm5); - psllw(xmm2, 11); - psrlw(xmm2, 8); - - lerp16<0>(xmm2, xmm7, ptr[&m_local.temp.uf]); - lerp16<0>(xmm2, xmm0, ptr[&m_local.temp.vf]); - - // xmm2 = r - // xmm1 = g00 - // xmm6 = b00 - // xmm4 = a00 - // xmm3 = c10 - // xmm5 = c11 - // xmm0, xmm7 = free - - movdqa(xmm7, xmm3); - psllw(xmm7, 6); - psrlw(xmm7, 11); - psllw(xmm7, 3); - - movdqa(xmm0, xmm5); - psllw(xmm0, 6); - psrlw(xmm0, 11); - psllw(xmm0, 3); - - lerp16<0>(xmm0, xmm7, ptr[&m_local.temp.uf]); - lerp16<0>(xmm0, xmm1, ptr[&m_local.temp.vf]); - - // xmm2 = r - // xmm0 = g - // xmm6 = b00 - // xmm4 = a00 - // xmm3 = c10 - // xmm5 = c11 - // xmm1, xmm7 = free - - movdqa(xmm7, xmm3); - psllw(xmm7, 1); - psrlw(xmm7, 11); - psllw(xmm7, 3); - - movdqa(xmm1, xmm5); - psllw(xmm1, 1); - psrlw(xmm1, 11); - psllw(xmm1, 3); - - lerp16<0>(xmm1, xmm7, ptr[&m_local.temp.uf]); - lerp16<0>(xmm1, xmm6, ptr[&m_local.temp.vf]); - - // xmm2 = r - // xmm0 = g - // xmm1 = b - // xmm4 = a00 - // xmm3 = c10 - // xmm5 = c11 - // xmm6, xmm7 = free - - psraw(xmm3, 15); - psrlw(xmm3, 8); - psraw(xmm5, 15); - psrlw(xmm5, 8); - - lerp16<0>(xmm5, xmm3, ptr[&m_local.temp.uf]); - lerp16<0>(xmm5, xmm4, ptr[&m_local.temp.vf]); - - // xmm2 = r - // xmm0 = g - // xmm1 = b - // xmm5 = a - // xmm3, xmm4, xmm6, xmm7 = free - - // TODO - movdqa(xmm3, xmm5); // a - movdqa(xmm4, xmm2); // r - movdqa(xmm6, xmm1); // b - movdqa(xmm5, xmm0); // g - - // reload test - - movdqa(xmm7, ptr[&m_local.temp.test]); - - // xmm4 = r - // xmm5 = g - // xmm6 = b - // xmm3 = a - // xmm7 = test - // xmm0, xmm1, xmm2 = free - - // test |= (c[0] | c[1] | c[2] | c[3]).eq16(GSVector4i::zero()); // mask out blank pixels (not perfect) - - movdqa(xmm1, xmm3); - por(xmm1, xmm4); - movdqa(xmm2, xmm5); - por(xmm2, xmm6); - por(xmm1, xmm2); - - pxor(xmm0, xmm0); - pcmpeqw(xmm1, xmm0); - por(xmm7, xmm1); - - // a = a.gt16(GSVector4i::zero()); - - pcmpgtw(xmm3, xmm0); - - // reload fd - - movdqa(xmm1, ptr[&m_local.temp.fd]); - } - else - { - if(m_sel.twin) - { - // u = (u & m_local.twin[0].u).add16(m_local.twin[1].u); - // v = (v & m_local.twin[0].v).add16(m_local.twin[1].v); - - pand(xmm2, ptr[&m_local.twin[0].u]); - paddw(xmm2, ptr[&m_local.twin[1].u]); - pand(xmm3, ptr[&m_local.twin[0].v]); - paddw(xmm3, ptr[&m_local.twin[1].v]); - } - else - { - // u = u.min_i16(m_local.twin[2].u); - // v = v.min_i16(m_local.twin[2].v); - - // TODO: if(!sprite) clamp16 else: - - pminsw(xmm2, ptr[&m_local.twin[2].u]); - pminsw(xmm3, ptr[&m_local.twin[2].v]); - } - - // xmm2 = u - // xmm3 = v - // xmm7 = test - // xmm0, xmm4, xmm5, xmm6 = free - // xmm1 = used - - // GSVector4i addr = v.sll16(8) | u; - - psllw(xmm3, 8); - por(xmm3, xmm2); - - // xmm3 = addr - // xmm7 = test - // xmm0, xmm2, xmm4, xmm5, xmm6 = free - // xmm1 = used - - ReadTexel(xmm6, xmm3); - - // xmm3 = c00 - // xmm7 = test - // xmm0, xmm2, xmm4, xmm5, xmm6 = free - // xmm1 = used - - // test |= c00.eq16(GSVector4i::zero()); // mask out blank pixels - - pxor(xmm0, xmm0); - pcmpeqw(xmm0, xmm6); - por(xmm7, xmm0); - - // c[0] = (c00 << 3) & 0x00f800f8; - // c[1] = (c00 >> 2) & 0x00f800f8; - // c[2] = (c00 >> 7) & 0x00f800f8; - // c[3] = c00.sra16(15); - - movdqa(xmm3, xmm6); - psraw(xmm3, 15); // a - - pcmpeqd(xmm0, xmm0); - psrlw(xmm0, 11); - psllw(xmm0, 3); // 0x00f8 - - movdqa(xmm4, xmm6); - psllw(xmm4, 3); - pand(xmm4, xmm0); // r - - movdqa(xmm5, xmm6); - psrlw(xmm5, 2); - pand(xmm5, xmm0); // g - - psrlw(xmm6, 7); - pand(xmm6, xmm0); // b - } -} - -void GPUDrawScanlineCodeGenerator::ColorTFX() -{ - switch(m_sel.tfx) - { - case 0: // none (tfx = 0) - case 1: // none (tfx = tge) - // c[0] = r.srl16(7); - // c[1] = g.srl16(7); - // c[2] = b.srl16(7); - psrlw(xmm4, 7); - psrlw(xmm5, 7); - psrlw(xmm6, 7); - break; - case 2: // modulate (tfx = tme | tge) - // c[0] = c[0].modulate16<1>(r).clamp8(); - // c[1] = c[1].modulate16<1>(g).clamp8(); - // c[2] = c[2].modulate16<1>(b).clamp8(); - pcmpeqd(xmm0, xmm0); - psrlw(xmm0, 8); - modulate16<1>(xmm4, ptr[&m_local.temp.r]); - pminsw(xmm4, xmm0); - modulate16<1>(xmm5, ptr[&m_local.temp.g]); - pminsw(xmm5, xmm0); - modulate16<1>(xmm6, ptr[&m_local.temp.b]); - pminsw(xmm6, xmm0); - break; - case 3: // decal (tfx = tme) - break; - } -} - -void GPUDrawScanlineCodeGenerator::AlphaBlend() -{ - if(!m_sel.abe) - { - return; - } - - // xmm1 = fd - // xmm3 = a - // xmm4 = r - // xmm5 = g - // xmm6 = b - // xmm7 = test - // xmm0, xmm2 = free - - // GSVector4i r = (fd & 0x001f001f) << 3; - - pcmpeqd(xmm0, xmm0); - psrlw(xmm0, 11); // 0x001f - movdqa(xmm2, xmm1); - pand(xmm2, xmm0); - psllw(xmm2, 3); - - switch(m_sel.abr) - { - case 0: - // r = r.avg8(c[0]); - pavgb(xmm2, xmm4); - break; - case 1: - // r = r.addus8(c[0]); - paddusb(xmm2, xmm4); - break; - case 2: - // r = r.subus8(c[0]); - psubusb(xmm2, xmm4); - break; - case 3: - // r = r.addus8(c[0].srl16(2)); - movdqa(xmm0, xmm4); - psrlw(xmm0, 2); - paddusb(xmm2, xmm0); - break; - } - - if(m_sel.tme) - { - movdqa(xmm0, xmm3); - blend8(xmm4, xmm2); - } - else - { - movdqa(xmm4, xmm2); - } - - // GSVector4i g = (d & 0x03e003e0) >> 2; - - pcmpeqd(xmm0, xmm0); - psrlw(xmm0, 11); - psllw(xmm0, 5); // 0x03e0 - movdqa(xmm2, xmm1); - pand(xmm2, xmm0); - psrlw(xmm2, 2); - - switch(m_sel.abr) - { - case 0: - // g = g.avg8(c[2]); - pavgb(xmm2, xmm5); - break; - case 1: - // g = g.addus8(c[2]); - paddusb(xmm2, xmm5); - break; - case 2: - // g = g.subus8(c[2]); - psubusb(xmm2, xmm5); - break; - case 3: - // g = g.addus8(c[2].srl16(2)); - movdqa(xmm0, xmm5); - psrlw(xmm0, 2); - paddusb(xmm2, xmm0); - break; - } - - if(m_sel.tme) - { - movdqa(xmm0, xmm3); - blend8(xmm5, xmm2); - } - else - { - movdqa(xmm5, xmm2); - } - - // GSVector4i b = (d & 0x7c007c00) >> 7; - - pcmpeqd(xmm0, xmm0); - psrlw(xmm0, 11); - psllw(xmm0, 10); // 0x7c00 - movdqa(xmm2, xmm1); - pand(xmm2, xmm0); - psrlw(xmm2, 7); - - switch(m_sel.abr) - { - case 0: - // b = b.avg8(c[2]); - pavgb(xmm2, xmm6); - break; - case 1: - // b = b.addus8(c[2]); - paddusb(xmm2, xmm6); - break; - case 2: - // b = b.subus8(c[2]); - psubusb(xmm2, xmm6); - break; - case 3: - // b = b.addus8(c[2].srl16(2)); - movdqa(xmm0, xmm6); - psrlw(xmm0, 2); - paddusb(xmm2, xmm0); - break; - } - - if(m_sel.tme) - { - movdqa(xmm0, xmm3); - blend8(xmm6, xmm2); - } - else - { - movdqa(xmm6, xmm2); - } -} - -void GPUDrawScanlineCodeGenerator::Dither() -{ - if(!m_sel.dtd) - { - return; - } - - // c[0] = c[0].addus8(dither); - // c[1] = c[1].addus8(dither); - // c[2] = c[2].addus8(dither); - - movdqa(xmm0, ptr[&m_local.temp.dither]); - - paddusb(xmm4, xmm0); - paddusb(xmm5, xmm0); - paddusb(xmm6, xmm0); -} - -void GPUDrawScanlineCodeGenerator::WriteFrame() -{ - // GSVector4i fs = r | g | b | (m_sel.md ? GSVector4i(0x80008000) : m_sel.tme ? a : 0); - - pcmpeqd(xmm0, xmm0); - - if(m_sel.md || m_sel.tme) - { - movdqa(xmm2, xmm0); - psllw(xmm2, 15); - } - - psrlw(xmm0, 11); - psllw(xmm0, 3); - - // xmm0 = 0x00f8 - // xmm2 = 0x8000 (md) - - // GSVector4i r = (c[0] & 0x00f800f8) >> 3; - - pand(xmm4, xmm0); - psrlw(xmm4, 3); - - // GSVector4i g = (c[1] & 0x00f800f8) << 2; - - pand(xmm5, xmm0); - psllw(xmm5, 2); - por(xmm4, xmm5); - - // GSVector4i b = (c[2] & 0x00f800f8) << 7; - - pand(xmm6, xmm0); - psllw(xmm6, 7); - por(xmm4, xmm6); - - if(m_sel.md) - { - // GSVector4i a = GSVector4i(0x80008000); - - por(xmm4, xmm2); - } - else if(m_sel.tme) - { - // GSVector4i a = (c[3] << 8) & 0x80008000; - - psllw(xmm3, 8); - pand(xmm3, xmm2); - por(xmm4, xmm3); - } - - // fs = fs.blend8(fd, test); - - movdqa(xmm0, xmm7); - blend8(xmm4, xmm1); - - // GSVector4i::store(fb, fs); - - // movdqu(ptr[edi], xmm4); - - movq(qword[edi], xmm4); - movhps(qword[edi + 8], xmm4); -} - -void GPUDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr) -{ - for(int i = 0; i < 8; i++) - { - pextrw(eax, addr, (uint8)i); - - if(m_sel.tlu) movzx(eax, byte[esi + eax]); - - const Address& src = m_sel.tlu ? ptr[edx + eax * 2] : ptr[esi + eax * 2]; - - if(i == 0) movd(dst, src); - else pinsrw(dst, src, (uint8)i); - } -} - -template -void GPUDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f) -{ - if(shift == 0 && m_cpu.has(util::Cpu::tSSSE3)) - { - pmulhrsw(a, f); - } - else - { - psllw(a, shift + 1); - pmulhw(a, f); - } -} - -template -void GPUDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Operand& f) -{ - psubw(a, b); - modulate16(a, f); - paddw(a, b); -} - -void GPUDrawScanlineCodeGenerator::alltrue() -{ - pmovmskb(eax, xmm7); - cmp(eax, 0xffff); - je("step", T_NEAR); -} - -void GPUDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b) -{ - if(m_cpu.has(util::Cpu::tSSE41)) - { - pblendvb(a, b); - } - else - { - blend(a, b, xmm0); - } -} - -void GPUDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask) -{ - pand(b, mask); - pandn(mask, a); - por(b, mask); - movdqa(a, b); -} - -const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] = -{ - GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000), - GSVector4i::zero(), -}; - -__aligned(const uint16, 32) GPUDrawScanlineCodeGenerator::m_dither[4][16] = -{ - {7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1}, - {2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4}, - {1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7}, - {4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2}, -}; diff --git a/plugins/GSdx_legacy/GPUDrawScanlineCodeGenerator.h b/plugins/GSdx_legacy/GPUDrawScanlineCodeGenerator.h deleted file mode 100644 index 00eff14c6d..0000000000 --- a/plugins/GSdx_legacy/GPUDrawScanlineCodeGenerator.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPUScanlineEnvironment.h" -#include "GSFunctionMap.h" - -using namespace Xbyak; - -class GPUDrawScanlineCodeGenerator : public GSCodeGenerator -{ - void operator = (const GPUDrawScanlineCodeGenerator&); - - GPUScanlineSelector m_sel; - GPUScanlineLocalData& m_local; - - void Generate(); - - void Init(); - void Step(); - void TestMask(); - void SampleTexture(); - void ColorTFX(); - void AlphaBlend(); - void Dither(); - void WriteFrame(); - - void ReadTexel(const Xmm& dst, const Xmm& addr); - - template void modulate16(const Xmm& a, const Operand& f); - template void lerp16(const Xmm& a, const Xmm& b, const Operand& f); - void alltrue(); - void blend8(const Xmm& a, const Xmm& b); - void blend(const Xmm& a, const Xmm& b, const Xmm& mask); - -public: - GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize); - - static const GSVector4i m_test[8]; - static __aligned(const uint16, 32) m_dither[4][16]; -}; \ No newline at end of file diff --git a/plugins/GSdx_legacy/GPUDrawingEnvironment.h b/plugins/GSdx_legacy/GPUDrawingEnvironment.h deleted file mode 100644 index 674bfb2830..0000000000 --- a/plugins/GSdx_legacy/GPUDrawingEnvironment.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPU.h" - -__aligned(class, 32) GPUDrawingEnvironment -{ -public: - GPURegSTATUS STATUS; - GPURegPRIM PRIM; - GPURegDAREA DAREA; - GPURegDHRANGE DHRANGE; - GPURegDVRANGE DVRANGE; - GPURegDRAREA DRAREATL; - GPURegDRAREA DRAREABR; - GPURegDROFF DROFF; - GPURegTWIN TWIN; - GPURegCLUT CLUT; - - GPUDrawingEnvironment() - { - Reset(); - } - - void Reset() - { - memset(this, 0, sizeof(*this)); - - STATUS.IDLE = 1; - STATUS.COM = 1; - STATUS.WIDTH0 = 1; - DVRANGE.Y1 = 16; - DVRANGE.Y2 = 256; - } - - GSVector4i GetDisplayRect() - { - static int s_width[] = {256, 320, 512, 640, 368, 384, 512, 640}; - static int s_height[] = {240, 480}; - - GSVector4i r; - - r.left = DAREA.X & ~7; // FIXME - r.top = DAREA.Y; - r.right = r.left + s_width[(STATUS.WIDTH1 << 2) | STATUS.WIDTH0]; - r.bottom = r.top + (DVRANGE.Y2 - DVRANGE.Y1) * s_height[STATUS.HEIGHT] / 240; - - return r.rintersect(GSVector4i(0, 0, 1024, 512)); - } - - float GetFPS() - { - return STATUS.ISPAL ? 50.0f : 59.94f; - } -}; \ No newline at end of file diff --git a/plugins/GSdx_legacy/GPULocalMemory.cpp b/plugins/GSdx_legacy/GPULocalMemory.cpp deleted file mode 100644 index 0218dcfc78..0000000000 --- a/plugins/GSdx_legacy/GPULocalMemory.cpp +++ /dev/null @@ -1,662 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GPULocalMemory.h" -#include "GSdx.h" - -const GSVector4i GPULocalMemory::m_xxxa(0x00008000); -const GSVector4i GPULocalMemory::m_xxbx(0x00007c00); -const GSVector4i GPULocalMemory::m_xgxx(0x000003e0); -const GSVector4i GPULocalMemory::m_rxxx(0x0000001f); - -#define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16)) -#define VM_ALLOC_SIZE (VM_REAL_SIZE * 2) -#define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32) - -GPULocalMemory::GPULocalMemory() -{ - m_scale.x = std::min(std::max(theApp.GetConfig("scale_x", 0), 0), 2); - m_scale.y = std::min(std::max(theApp.GetConfig("scale_y", 0), 0), 2); - - // - - int size = VM_REAL_SIZE; - - m_vm = (uint16*)vmalloc(VM_ALLOC_SIZE, false); - - memset(m_vm, 0, size); - - // - - m_clut.buff = m_vm + size; - m_clut.dirty = true; - - // - - size = TEX_ALLOC_SIZE; - - m_texture.buff[0] = (uint8*)vmalloc(size, false); - m_texture.buff[1] = m_texture.buff[0] + 256 * 256 * 32; - m_texture.buff[2] = m_texture.buff[1] + 256 * 256 * 32; - - memset(m_texture.buff[0], 0, size); - - memset(m_texture.valid, 0, sizeof(m_texture.valid)); - - for(int y = 0, offset = 0; y < 2; y++) - { - for(int x = 0; x < 16; x++, offset += 256 * 256) - { - m_texture.page[0][y][x] = &((uint8*)m_texture.buff[0])[offset]; - m_texture.page[1][y][x] = &((uint8*)m_texture.buff[1])[offset]; - } - } - - for(int y = 0, offset = 0; y < 2; y++) - { - for(int x = 0; x < 16; x++, offset += 256 * 256) - { - m_texture.page[2][y][x] = &((uint32*)m_texture.buff[2])[offset]; - } - } -} - -GPULocalMemory::~GPULocalMemory() -{ - vmfree(m_vm, VM_ALLOC_SIZE); - - vmfree(m_texture.buff[0], TEX_ALLOC_SIZE); -} - -const uint16* GPULocalMemory::GetCLUT(int tp, int cx, int cy) -{ - if(m_clut.dirty || m_clut.tp != tp || m_clut.cx != cx || m_clut.cy != cy) - { - uint16* src = GetPixelAddressScaled(cx << 4, cy); - uint16* dst = m_clut.buff; - - if(m_scale.x == 0) - { - memcpy(dst, src, (tp == 0 ? 16 : 256) * 2); - } - else if(m_scale.x == 1) - { - if(tp == 0) - { - for(int i = 0; i < 16; i++) - { - dst[i] = src[i * 2]; - } - } - else if(tp == 1) - { - for(int i = 0; i < 256; i++) - { - dst[i] = src[i * 2]; - } - } - } - else if(m_scale.x == 2) - { - if(tp == 0) - { - for(int i = 0; i < 16; i++) - { - dst[i] = src[i * 4]; - } - } - else if(tp == 1) - { - for(int i = 0; i < 256; i++) - { - dst[i] = src[i * 4]; - } - } - } - else - { - ASSERT(0); - } - - m_clut.tp = tp; - m_clut.cx = cx; - m_clut.cy = cy; - m_clut.dirty = false; - } - - return m_clut.buff; -} - -const void* GPULocalMemory::GetTexture(int tp, int tx, int ty) -{ - if(tp == 3) - { - ASSERT(0); - - return NULL; - } - - void* buff = m_texture.page[tp][ty][tx]; - - uint32 flag = 1 << tx; - - if((m_texture.valid[tp][ty] & flag) == 0) - { - // int bpp = 0; - - switch(tp) - { - case 0: - ReadPage4(tx, ty, (uint8*)buff); - // bpp = 4; - break; - case 1: - ReadPage8(tx, ty, (uint8*)buff); - // bpp = 8; - break; - case 2: - case 3: - ReadPage16(tx, ty, (uint16*)buff); - // bpp = 16; - default: - // FIXME: __assume(0); // vc9 generates bogus code in release mode - break; - } - - // TODO: m_state->m_perfmon.Put(GSPerfMon::Unswizzle, 256 * 256 * bpp >> 3); - - m_texture.valid[tp][ty] |= flag; - } - - return buff; -} - -void GPULocalMemory::Invalidate(const GSVector4i& r) -{ - if(!m_clut.dirty) - { - if(r.top <= m_clut.cy && m_clut.cy < r.bottom) - { - int left = m_clut.cx << 4; - int right = left + (m_clut.tp == 0 ? 16 : 256); - - if(r.left < right && r.right > left) - { - m_clut.dirty = true; - } - } - } - - for(int y = 0, ye = min(r.bottom, 512), j = 0; y < ye; y += 256, j++) - { - if(r.top >= y + 256) continue; - - for(int x = 0, xe = min(r.right, 1024), i = 0; x < xe; x += 64, i++) - { - uint32 flag = 1 << i; - - if(r.left >= x + 256) continue; - - m_texture.valid[2][j] &= ~flag; - - if(r.left >= x + 128) continue; - - m_texture.valid[1][j] &= ~flag; - - if(r.left >= x + 64) continue; - - m_texture.valid[0][j] &= ~flag; - } - } -} - -void GPULocalMemory::FillRect(const GSVector4i& r, uint16 c) -{ - Invalidate(r); - - uint16* RESTRICT dst = GetPixelAddressScaled(r.left, r.top); - - int w = r.width() << m_scale.x; - int h = r.height() << m_scale.y; - - int pitch = GetWidth(); - - for(int j = 0; j < h; j++, dst += pitch) - { - for(int i = 0; i < w; i++) - { - dst[i] = c; - } - } -} - -void GPULocalMemory::WriteRect(const GSVector4i& r, const uint16* RESTRICT src) -{ - Invalidate(r); - - uint16* RESTRICT dst = GetPixelAddressScaled(r.left, r.top); - - int w = r.width(); - int h = r.height(); - - int pitch = GetWidth(); - - if(m_scale.x == 0) - { - for(int j = 0; j < h; j++, src += w) - { - for(int k = 1 << m_scale.y; k >= 1; k--, dst += pitch) - { - memcpy(dst, src, w * 2); - } - } - } - else if(m_scale.x == 1) - { - for(int j = 0; j < h; j++, src += w) - { - for(int k = 1 << m_scale.y; k >= 1; k--, dst += pitch) - { - for(int i = 0; i < w; i++) - { - dst[i * 2 + 0] = src[i]; - dst[i * 2 + 1] = src[i]; - } - } - } - } - else if(m_scale.x == 2) - { - for(int j = 0; j < h; j++, src += w) - { - for(int k = 1 << m_scale.y; k >= 1; k--, dst += pitch) - { - for(int i = 0; i < w; i++) - { - dst[i * 4 + 0] = src[i]; - dst[i * 4 + 1] = src[i]; - dst[i * 4 + 2] = src[i]; - dst[i * 4 + 3] = src[i]; - } - } - } - } - else - { - ASSERT(0); - } -} - -void GPULocalMemory::ReadRect(const GSVector4i& r, uint16* RESTRICT dst) -{ - uint16* RESTRICT src = GetPixelAddressScaled(r.left, r.top); - - int w = r.width(); - int h = r.height(); - - int pitch = GetWidth() << m_scale.y; - - if(m_scale.x == 0) - { - for(int j = 0; j < h; j++, src += pitch, dst += w) - { - memcpy(dst, src, w * 2); - } - } - else if(m_scale.x == 1) - { - for(int j = 0; j < h; j++, src += pitch, dst += w) - { - for(int i = 0; i < w; i++) - { - dst[i] = src[i * 2]; - } - } - } - else if(m_scale.x == 2) - { - for(int j = 0; j < h; j++, src += pitch, dst += w) - { - for(int i = 0; i < w; i++) - { - dst[i] = src[i * 4]; - } - } - } - else - { - ASSERT(0); - } -} - -void GPULocalMemory::MoveRect(int sx, int sy, int dx, int dy, int w, int h) -{ - Invalidate(GSVector4i(dx, dy, dx + w, dy + h)); - - uint16* s = GetPixelAddressScaled(sx, sy); - uint16* d = GetPixelAddressScaled(dx, dy); - - w <<= m_scale.x; - h <<= m_scale.y; - - int pitch = GetWidth(); - - for(int i = 0; i < h; i++, s += pitch, d += pitch) - { - memcpy(d, s, w * sizeof(uint16)); - } -} - -void GPULocalMemory::ReadPage4(int tx, int ty, uint8* RESTRICT dst) -{ - uint16* src = GetPixelAddressScaled(tx << 6, ty << 8); - - int pitch = GetWidth() << m_scale.y; - - if(m_scale.x == 0) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - for(int i = 0; i < 64; i++) - { - dst[i * 4 + 0] = (src[i] >> 0) & 0xf; - dst[i * 4 + 1] = (src[i] >> 4) & 0xf; - dst[i * 4 + 2] = (src[i] >> 8) & 0xf; - dst[i * 4 + 3] = (src[i] >> 12) & 0xf; - } - } - } - else if(m_scale.x == 1) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - for(int i = 0; i < 64; i++) - { - dst[i * 4 + 0] = (src[i * 2] >> 0) & 0xf; - dst[i * 4 + 1] = (src[i * 2] >> 4) & 0xf; - dst[i * 4 + 2] = (src[i * 2] >> 8) & 0xf; - dst[i * 4 + 3] = (src[i * 2] >> 12) & 0xf; - } - } - } - else if(m_scale.x == 2) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - for(int i = 0; i < 64; i++) - { - dst[i * 4 + 0] = (src[i * 4] >> 0) & 0xf; - dst[i * 4 + 1] = (src[i * 4] >> 4) & 0xf; - dst[i * 4 + 2] = (src[i * 4] >> 8) & 0xf; - dst[i * 4 + 3] = (src[i * 4] >> 12) & 0xf; - } - } - } - else - { - ASSERT(0); - } -} - -void GPULocalMemory::ReadPage8(int tx, int ty, uint8* RESTRICT dst) -{ - uint16* src = GetPixelAddressScaled(tx << 6, ty << 8); - - int pitch = GetWidth() << m_scale.y; - - if(m_scale.x == 0) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - memcpy(dst, src, 256); - } - } - else if(m_scale.x == 1) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - for(int i = 0; i < 128; i++) - { - ((uint16*)dst)[i] = src[i * 2]; - } - } - } - else if(m_scale.x == 2) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - for(int i = 0; i < 128; i++) - { - ((uint16*)dst)[i] = src[i * 4]; - } - } - } - else - { - ASSERT(0); - } -} - -void GPULocalMemory::ReadPage16(int tx, int ty, uint16* RESTRICT dst) -{ - uint16* src = GetPixelAddressScaled(tx << 6, ty << 8); - - int pitch = GetWidth() << m_scale.y; - - if(m_scale.x == 0) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - memcpy(dst, src, 512); - } - } - else if(m_scale.x == 1) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - for(int i = 0; i < 256; i++) - { - dst[i] = src[i * 2]; - } - } - } - else if(m_scale.x == 2) - { - for(int j = 0; j < 256; j++, src += pitch, dst += 256) - { - for(int i = 0; i < 256; i++) - { - dst[i] = src[i * 4]; - } - } - } - else - { - ASSERT(0); - } -} - -void GPULocalMemory::ReadFrame32(const GSVector4i& r, uint32* RESTRICT dst, bool rgb24) -{ - uint16* src = GetPixelAddress(r.left, r.top); - - int pitch = GetWidth(); - - if(rgb24) - { - for(int i = r.top; i < r.bottom; i++, src += pitch, dst += pitch) - { - Expand24(src, dst, r.width()); - } - } - else - { - for(int i = r.top; i < r.bottom; i++, src += pitch, dst += pitch) - { - Expand16(src, dst, r.width()); - } - } -} - -void GPULocalMemory::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int pixels) -{ - GSVector4i rm = m_rxxx; - GSVector4i gm = m_xgxx; - GSVector4i bm = m_xxbx; - GSVector4i am = m_xxxa; - - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - for(int i = 0, j = pixels >> 3; i < j; i++) - { - GSVector4i c = s[i]; - - GSVector4i l = c.upl16(); - GSVector4i h = c.uph16(); - - d[i * 2 + 0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | ((l & am) << 16); - d[i * 2 + 1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | ((h & am) << 16); - } -} - -void GPULocalMemory::Expand24(const uint16* RESTRICT src, uint32* RESTRICT dst, int pixels) -{ - uint8* s = (uint8*)src; - - if(m_scale.x == 0) - { - for(int i = 0; i < pixels; i += 2, s += 6) - { - dst[i + 0] = (s[2] << 16) | (s[1] << 8) | s[0]; - dst[i + 1] = (s[5] << 16) | (s[4] << 8) | s[3]; - } - } - else if(m_scale.x == 1) - { - for(int i = 0; i < pixels; i += 4, s += 12) - { - dst[i + 0] = dst[i + 1] = (s[4] << 16) | (s[1] << 8) | s[0]; - dst[i + 2] = dst[i + 3] = (s[9] << 16) | (s[8] << 8) | s[5]; - } - } - else if(m_scale.x == 2) - { - for(int i = 0; i < pixels; i += 8, s += 24) - { - dst[i + 0] = dst[i + 1] = dst[i + 2] = dst[i + 3] = (s[8] << 16) | (s[1] << 8) | s[0]; - dst[i + 4] = dst[i + 5] = dst[i + 6] = dst[i + 7] = (s[17] << 16) | (s[16] << 8) | s[9]; - } - } - else - { - ASSERT(0); - } -} - -#include "GSTextureSW.h" - -void GPULocalMemory::SaveBMP(const string& fn, const GSVector4i& r2, int tp, int cx, int cy) -{ - GSVector4i r; - - r.left = r2.left << m_scale.x; - r.top = r2.top << m_scale.y; - r.right = r2.right << m_scale.x; - r.bottom = r2.bottom << m_scale.y; - - r.left &= ~1; - r.right &= ~1; - - GSTextureSW t(GSTexture::Offscreen, r.width(), r.height()); - - GSTexture::GSMap m; - - if(t.Map(m, NULL)) - { - int pitch = GetWidth(); - - const uint16* RESTRICT src = GetPixelAddress(r.left, r.top); - const uint16* RESTRICT clut = GetCLUT(tp, cx, cy); - - uint8* RESTRICT dst = m.bits; - - uint16* RESTRICT buff = (uint16*)_aligned_malloc(pitch * sizeof(uint16), 32); - uint32* RESTRICT buff32 = (uint32*)_aligned_malloc(pitch * sizeof(uint32), 32); - - for(int j = r.top; j < r.bottom; j++, src += pitch, dst += m.pitch) - { - switch(tp) - { - case 0: // 4 bpp - - for(int i = 0, k = r.width() / 2; i < k; i++) - { - buff[i * 2 + 0] = clut[((uint8*)src)[i] & 0xf]; - buff[i * 2 + 1] = clut[((uint8*)src)[i] >> 4]; - } - - break; - - case 1: // 8 bpp - - for(int i = 0, k = r.width(); i < k; i++) - { - buff[i] = clut[((uint8*)src)[i]]; - } - - break; - - case 2: // 16 bpp; - - for(int i = 0, k = r.width(); i < k; i++) - { - buff[i] = src[i]; - } - - break; - - case 3: // 24 bpp - - // TODO - - break; - } - - Expand16(buff, buff32, r.width()); - - for(int i = 0, k = r.width(); i < k; i++) - { - buff32[i] = (buff32[i] & 0xff00ff00) | ((buff32[i] & 0x00ff0000) >> 16) | ((buff32[i] & 0x000000ff) << 16); - } - - memcpy(dst, buff32, r.width() << 2); - } - - _aligned_free(buff); - _aligned_free(buff32); - - t.Unmap(); - - t.Save(fn); - } -} diff --git a/plugins/GSdx_legacy/GPULocalMemory.h b/plugins/GSdx_legacy/GPULocalMemory.h deleted file mode 100644 index 4cd94d8a82..0000000000 --- a/plugins/GSdx_legacy/GPULocalMemory.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPU.h" -#include "GSVector.h" - -class GPULocalMemory -{ - static const GSVector4i m_xxxa; - static const GSVector4i m_xxbx; - static const GSVector4i m_xgxx; - static const GSVector4i m_rxxx; - - uint16* m_vm; - - struct - { - uint16* buff; - int tp, cx, cy; - bool dirty; - } m_clut; - - struct - { - uint8* buff[3]; - void* page[3][2][16]; - uint16 valid[3][2]; - } m_texture; - - GSVector2i m_scale; - -public: - GPULocalMemory(); - virtual ~GPULocalMemory(); - - GSVector2i GetScale() {return m_scale;} - - int GetWidth() {return 1 << (10 + m_scale.x);} - int GetHeight() {return 1 << (9 + m_scale.y);} - - uint16* GetPixelAddress(int x, int y) const {return &m_vm[(y << (10 + m_scale.x)) + x];} - uint16* GetPixelAddressScaled(int x, int y) const {return &m_vm[((y << m_scale.y) << (10 + m_scale.x)) + (x << m_scale.x)];} - - const uint16* GetCLUT(int tp, int cx, int cy); - const void* GetTexture(int tp, int tx, int ty); - - void Invalidate(const GSVector4i& r); - - void FillRect(const GSVector4i& r, uint16 c); - void WriteRect(const GSVector4i& r, const uint16* RESTRICT src); - void ReadRect(const GSVector4i& r, uint16* RESTRICT dst); - void MoveRect(int sx, int sy, int dx, int dy, int w, int h); - - void ReadPage4(int tx, int ty, uint8* RESTRICT dst); - void ReadPage8(int tx, int ty, uint8* RESTRICT dst); - void ReadPage16(int tx, int ty, uint16* RESTRICT dst); - - void ReadFrame32(const GSVector4i& r, uint32* RESTRICT dst, bool rgb24); - - void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int pixels); - void Expand24(const uint16* RESTRICT src, uint32* RESTRICT dst, int pixels); - - void SaveBMP(const string& fn, const GSVector4i& r, int tp, int cx, int cy); -}; diff --git a/plugins/GSdx_legacy/GPURenderer.cpp b/plugins/GSdx_legacy/GPURenderer.cpp deleted file mode 100644 index 32338c5efb..0000000000 --- a/plugins/GSdx_legacy/GPURenderer.cpp +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GPURenderer.h" -#include "GSdx.h" - -#ifdef _WIN32 - -map GPURenderer::m_wnd2gpu; - -#endif - -GPURenderer::GPURenderer(GSDevice* dev) - : m_dev(dev) -{ - m_filter = theApp.GetConfig("filter", 0); - m_dither = theApp.GetConfig("dithering", 1); - m_aspectratio = theApp.GetConfig("AspectRatio", 1); - m_vsync = !!theApp.GetConfig("vsync", 0); - m_fxaa = !!theApp.GetConfig("fxaa", 0); - m_shaderfx = !!theApp.GetConfig("shaderfx", 0); - m_scale = m_mem.GetScale(); - m_shadeboost = !!theApp.GetConfig("ShadeBoost", 0); - - #ifdef _WIN32 - - m_hWnd = NULL; - m_wndproc = NULL; - - m_wnd = new GSWndDX(); - - #endif -} - -GPURenderer::~GPURenderer() -{ - #ifdef _WIN32 - - if(m_wndproc) - { - SetWindowLongPtr(m_hWnd, GWLP_WNDPROC, (LONG_PTR)m_wndproc); - - m_wnd2gpu.erase(m_hWnd); - } - - #endif -} - -bool GPURenderer::Create(void* hWnd) -{ - #ifdef _WIN32 - - // TODO: move subclassing inside GSWnd::Attach - - m_hWnd = (HWND)hWnd; - - m_wndproc = (WNDPROC)GetWindowLongPtr(m_hWnd, GWLP_WNDPROC); - - SetWindowLongPtr(m_hWnd, GWLP_WNDPROC, (LONG_PTR)WndProc); - - if(!m_wnd->Attach(m_hWnd)) - { - return false; - } - - m_wnd2gpu[m_hWnd] = this; - - SetWindowLong(m_hWnd, GWL_STYLE, GetWindowLong(m_hWnd, GWL_STYLE) | WS_OVERLAPPEDWINDOW); - - #endif - - m_wnd->Show(); - - if(!m_dev->Create(m_wnd)) - { - return false; - } - - m_dev->SetVSync(m_vsync); - - Reset(); - - return true; -} - -bool GPURenderer::Merge() -{ - GSTexture* st[2] = {GetOutput(), NULL}; - - if(!st[0]) - { - return false; - } - - GSVector2i s = st[0]->GetSize(); - - GSVector4 sr[2]; - GSVector4 dr[2]; - - sr[0] = GSVector4(0, 0, 1, 1); - dr[0] = GSVector4(0, 0, s.x, s.y); - - m_dev->Merge(st, sr, dr, s, 1, 1, GSVector4(0, 0, 0, 1)); - - if(m_shadeboost) - { - m_dev->ShadeBoost(); - } - - if (m_shaderfx) - { - m_dev->ExternalFX(); - } - - if(m_fxaa) - { - m_dev->FXAA(); - } - - return true; -} - -void GPURenderer::VSync() -{ - GSPerfMonAutoTimer pmat(&m_perfmon); - - m_perfmon.Put(GSPerfMon::Frame); - - // m_env.STATUS.LCF = ~m_env.STATUS.LCF; // ? - - #ifdef _WIN32 - - if(!IsWindow(m_hWnd)) return; - - #endif - - Flush(); - - if(!m_dev->IsLost(true)) - { - if(!Merge()) - { - return; - } - } - else - { - ResetDevice(); - } - - // osd - - if((m_perfmon.GetFrame() & 0x1f) == 0) - { - m_perfmon.Update(); - - double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); - - GSVector4i r = m_env.GetDisplayRect(); - - int w = r.width() << m_scale.x; - int h = r.height() << m_scale.y; - - string s = format( - "%lld | %d x %d | %.2f fps (%d%%) | %d/%d | %d%% CPU | %.2f | %.2f", - m_perfmon.GetFrame(), w, h, fps, (int)(100.0 * fps / m_env.GetFPS()), - (int)m_perfmon.Get(GSPerfMon::Prim), - (int)m_perfmon.Get(GSPerfMon::Draw), - m_perfmon.CPU(), - m_perfmon.Get(GSPerfMon::Swizzle) / 1024, - m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 - ); - - double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); - - if(fillrate > 0) - { - s = format("%s | %.2f mpps", s.c_str(), fps * fillrate / (1024 * 1024)); - } - - m_wnd->SetWindowText(s.c_str()); - } - - GSVector4i r = m_wnd->GetClientRect(); - - m_dev->Present(r.fit(m_aspectratio), 0); -} - -bool GPURenderer::MakeSnapshot(const string& path) -{ - time_t t = time(NULL); - - char buff[16]; - - if(!strftime(buff, sizeof(buff), "%Y%m%d%H%M%S", localtime(&t))) - { - return false; - } - - if(GSTexture* t = m_dev->GetCurrent()) - { - return t->Save(format("%s_%s.bmp", path.c_str(), buff)); - } - - return false; -} - -#ifdef _WIN32 - -LRESULT CALLBACK GPURenderer::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) -{ - map::iterator i = m_wnd2gpu.find(hWnd); - - if(i != m_wnd2gpu.end()) - { - return i->second->OnMessage(message, wParam, lParam); - } - - ASSERT(0); - - return 0; -} - -LRESULT GPURenderer::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) -{ - if(message == WM_KEYUP) - { - switch(wParam) - { - case VK_DELETE: - m_filter = (m_filter + 1) % 3; - return 0; - case VK_END: - m_dither = m_dither ? 0 : 1; - return 0; - case VK_NEXT: - m_aspectratio = (m_aspectratio + 1) % 3; - return 0; - case VK_PRIOR: - m_fxaa = !m_fxaa; - return 0; - case VK_HOME: - m_shaderfx = !m_shaderfx; - return 0; - } - } - - return CallWindowProc(m_wndproc, m_hWnd, message, wParam, lParam); -} - -#endif diff --git a/plugins/GSdx_legacy/GPURenderer.h b/plugins/GSdx_legacy/GPURenderer.h deleted file mode 100644 index 74fb9b2e7d..0000000000 --- a/plugins/GSdx_legacy/GPURenderer.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPUState.h" -#include "GSVertexList.h" -#include "GSDevice.h" -#ifdef _WIN32 -#include "GSWndDX.h" -#endif - -class GPURenderer : public GPUState -{ - bool Merge(); - -protected: - GSDevice* m_dev; - int m_filter; - int m_dither; - int m_aspectratio; - bool m_vsync; - bool m_shaderfx; - bool m_fxaa; - bool m_shadeboost; - GSVector2i m_scale; - - virtual void ResetDevice() {} - virtual GSTexture* GetOutput() = 0; - - #ifdef _WIN32 - - HWND m_hWnd; - WNDPROC m_wndproc; - static map m_wnd2gpu; - - static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); - LRESULT OnMessage(UINT message, WPARAM wParam, LPARAM lParam); - - #endif - - GSWnd* m_wnd; - -public: - GPURenderer(GSDevice* dev); - virtual ~GPURenderer(); - - virtual bool Create(void* hWnd); - virtual void VSync(); - virtual bool MakeSnapshot(const string& path); -}; - -template -class GPURendererT : public GPURenderer -{ -protected: - Vertex* m_vertices; - int m_count; - int m_maxcount; - GSVertexList m_vl; - - void Reset() - { - m_count = 0; - m_vl.RemoveAll(); - - GPURenderer::Reset(); - } - - void ResetPrim() - { - m_vl.RemoveAll(); - } - - void FlushPrim() - { - if(m_count > 0) - { - /* - Dump("db"); - - if(m_env.PRIM.TME) - { - GSVector4i r; - - r.left = m_env.STATUS.TX << 6; - r.top = m_env.STATUS.TY << 8; - r.right = r.left + 256; - r.bottom = r.top + 256; - - Dump(format("da_%d_%d_%d_%d_%d", m_env.STATUS.TP, r.left, r.top, r.right, r.bottom).c_str(), m_env.STATUS.TP, r, false); - } - */ - - Draw(); - - m_count = 0; - - //Dump("dc", false); - } - } - - void GrowVertexBuffer() - { - int maxcount = std::max(m_maxcount * 3 / 2, 10000); - Vertex* vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * maxcount, 32); - - if(vertices == NULL) - { - printf("GSdx: failed to allocate %d bytes for verticles.\n", (int)sizeof(Vertex) * maxcount); - throw GSDXError(); - } - - if(m_vertices != NULL) - { - memcpy(vertices, m_vertices, sizeof(Vertex) * m_maxcount); - _aligned_free(m_vertices); - } - - m_vertices = vertices; - m_maxcount = maxcount - 100; - } - - __forceinline Vertex* DrawingKick(int& count) - { - count = (int)m_env.PRIM.VTX; - - if(m_vl.GetCount() < count) - { - return NULL; - } - - if(m_count >= m_maxcount) - { - GrowVertexBuffer(); - } - - Vertex* v = &m_vertices[m_count]; - - switch(m_env.PRIM.TYPE) - { - case GPU_POLYGON: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.GetAt(2, v[2]); - m_vl.RemoveAll(); - break; - case GPU_LINE: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.RemoveAll(); - break; - case GPU_SPRITE: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.RemoveAll(); - break; - default: - ASSERT(0); - m_vl.RemoveAll(); - return NULL; - } - - return v; - } - - virtual void VertexKick() = 0; - - virtual void Draw() = 0; - -public: - GPURendererT(GSDevice* dev) - : GPURenderer(dev) - , m_vertices(NULL) - , m_count(0) - , m_maxcount(0) - { - } - - virtual ~GPURendererT() - { - if(m_vertices) _aligned_free(m_vertices); - } -}; diff --git a/plugins/GSdx_legacy/GPURendererSW.cpp b/plugins/GSdx_legacy/GPURendererSW.cpp deleted file mode 100644 index addd2c4379..0000000000 --- a/plugins/GSdx_legacy/GPURendererSW.cpp +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GPURendererSW.h" -//#include "GSdx.h" - -GPURendererSW::GPURendererSW(GSDevice* dev, int threads) - : GPURendererT(dev) - , m_texture(NULL) -{ - m_output = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 32); - - m_rl = GSRasterizerList::Create(threads, &m_perfmon); -} - -GPURendererSW::~GPURendererSW() -{ - delete m_texture; - - delete m_rl; - - _aligned_free(m_output); -} - -void GPURendererSW::ResetDevice() -{ - delete m_texture; - - m_texture = NULL; -} - -GSTexture* GPURendererSW::GetOutput() -{ - GSVector4i r = m_env.GetDisplayRect(); - - r.left <<= m_scale.x; - r.top <<= m_scale.y; - r.right <<= m_scale.x; - r.bottom <<= m_scale.y; - - if(m_dev->ResizeTexture(&m_texture, r.width(), r.height())) - { - m_mem.ReadFrame32(r, m_output, !!m_env.STATUS.ISRGB24); - - m_texture->Update(r.rsize(), m_output, m_mem.GetWidth() * sizeof(uint32)); - } - - return m_texture; -} - -void GPURendererSW::Draw() -{ - GPUDrawScanline::SharedData* sd = new GPUDrawScanline::SharedData(); - - shared_ptr data(sd); - - GPUScanlineGlobalData& gd = sd->global; - - const GPUDrawingEnvironment& env = m_env; - - gd.sel.key = 0; - gd.sel.iip = env.PRIM.IIP; - gd.sel.me = env.STATUS.ME; - - if(env.PRIM.ABE) - { - gd.sel.abe = env.PRIM.ABE; - gd.sel.abr = env.STATUS.ABR; - } - - gd.sel.tge = env.PRIM.TGE; - - if(env.PRIM.TME) - { - gd.sel.tme = env.PRIM.TME; - gd.sel.tlu = env.STATUS.TP < 2; - gd.sel.twin = (env.TWIN.u32 & 0xfffff) != 0; - gd.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0; - - const void* t = m_mem.GetTexture(env.STATUS.TP, env.STATUS.TX, env.STATUS.TY); - - if(!t) {ASSERT(0); return;} - - gd.tex = t; - - gd.clut = (uint16*)_aligned_malloc(sizeof(uint16) * 256, 32); - - memcpy(gd.clut, m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y), sizeof(uint16) * (env.STATUS.TP == 0 ? 16 : 256)); - - gd.twin = GSVector4i(env.TWIN.TWW, env.TWIN.TWH, env.TWIN.TWX, env.TWIN.TWY); - } - - gd.sel.dtd = m_dither ? env.STATUS.DTD : 0; - gd.sel.md = env.STATUS.MD; - gd.sel.sprite = env.PRIM.TYPE == GPU_SPRITE; - gd.sel.scalex = m_mem.GetScale().x; - - gd.vm = m_mem.GetPixelAddress(0, 0); - - data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x; - data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y; - data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth()); - data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight()); - - data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 32); - data->vertex = (GSVertexSW*)data->buff; - data->vertex_count = m_count; - - memcpy(data->vertex, m_vertices, sizeof(GSVertexSW) * m_count); - - data->frame = m_perfmon.GetFrame(); - - int prims = 0; - - switch(env.PRIM.TYPE) - { - case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; prims = data->vertex_count / 3; break; - case GPU_LINE: data->primclass = GS_LINE_CLASS; prims = data->vertex_count / 2; break; - case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; prims = data->vertex_count / 2; break; - default: __assume(0); - } - - // TODO: VertexTrace - - GSVector4 tl(+1e10f); - GSVector4 br(-1e10f); - - GSVertexSW* v = data->vertex; - - for(int i = 0, j = data->vertex_count; i < j; i++) - { - GSVector4 p = v[i].p; - - tl = tl.min(p); - br = br.max(p); - } - - data->bbox = GSVector4i(tl.xyxy(br)); - - GSVector4i r = data->bbox.rintersect(data->scissor); - - r.left >>= m_scale.x; - r.top >>= m_scale.y; - r.right >>= m_scale.x; - r.bottom >>= m_scale.y; - - Invalidate(r); - - m_rl->Queue(data); - - m_rl->Sync(); - - m_perfmon.Put(GSPerfMon::Draw, 1); - m_perfmon.Put(GSPerfMon::Prim, prims); - m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels()); -} - -void GPURendererSW::VertexKick() -{ - GSVertexSW& dst = m_vl.AddTail(); - - // TODO: x/y + off.x/y should wrap around at +/-1024 - - int x = (int)(m_v.XY.X + m_env.DROFF.X) << m_scale.x; - int y = (int)(m_v.XY.Y + m_env.DROFF.Y) << m_scale.y; - - int u = m_v.UV.X; - int v = m_v.UV.Y; - - GSVector4 pt(x, y, u, v); - - dst.p = pt.xyxy(GSVector4::zero()); - dst.t = (pt.zwzw(GSVector4::zero()) + GSVector4(0.125f)) * 256.0f; - // dst.c = GSVector4(m_v.RGB.u32) * 128.0f; - dst.c = GSVector4(GSVector4i::load((int)m_v.RGB.u32).u8to32() << 7); - - int count = 0; - - if(DrawingKick(count)) - { - // TODO - - m_count += count; - } -} - diff --git a/plugins/GSdx_legacy/GPURendererSW.h b/plugins/GSdx_legacy/GPURendererSW.h deleted file mode 100644 index e340191bef..0000000000 --- a/plugins/GSdx_legacy/GPURendererSW.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPURenderer.h" -#include "GPUDrawScanline.h" - -class GPURendererSW : public GPURendererT -{ -protected: - IRasterizer* m_rl; - GSTexture* m_texture; - uint32* m_output; - - void ResetDevice(); - GSTexture* GetOutput(); - void VertexKick(); - void Draw(); - -public: - GPURendererSW(GSDevice* dev, int threads); - virtual ~GPURendererSW(); -}; diff --git a/plugins/GSdx_legacy/GPUScanlineEnvironment.h b/plugins/GSdx_legacy/GPUScanlineEnvironment.h deleted file mode 100644 index ad9d7fa1fd..0000000000 --- a/plugins/GSdx_legacy/GPUScanlineEnvironment.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSVector.h" -#include "GPULocalMemory.h" - -union GPUScanlineSelector -{ - struct - { - uint32 iip:1; // 0 - uint32 me:1; // 1 - uint32 abe:1; // 2 - uint32 abr:2; // 3 - uint32 tge:1; // 5 - uint32 tme:1; // 6 - uint32 twin:1; // 7 - uint32 tlu:1; // 8 - uint32 dtd:1; // 9 - uint32 ltf:1; // 10 - uint32 md:1; // 11 - uint32 sprite:1; // 12 - uint32 scalex:2; // 13 - }; - - struct - { - uint32 _pad1:1; // 0 - uint32 rfb:2; // 1 - uint32 _pad2:2; // 3 - uint32 tfx:2; // 5 - }; - - uint32 key; - - operator uint32() const {return key;} -}; - -__aligned(struct, 32) GPUScanlineGlobalData -{ - GPUScanlineSelector sel; - - void* vm; - const void* tex; - uint16* clut; - GSVector4i twin; // TWW, TWH, TWX, TWY -}; - -__aligned(struct, 32) GPUScanlineLocalData -{ - const GPUScanlineGlobalData* gd; - - struct {GSVector4i u, v;} twin[3]; - struct {GSVector4i s, t, r, g, b, _pad[3];} d; - struct {GSVector4i st, c;} d8; - - struct {GSVector4i s, t, r, b, g, uf, vf, dither, fd, test;} temp; -}; diff --git a/plugins/GSdx_legacy/GPUSettingsDlg.cpp b/plugins/GSdx_legacy/GPUSettingsDlg.cpp deleted file mode 100644 index bdbcecb9b3..0000000000 --- a/plugins/GSdx_legacy/GPUSettingsDlg.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSUtil.h" -#include "GPUSettingsDlg.h" -#include "resource.h" - -GPUSettingsDlg::GPUSettingsDlg() - : GSDialog(IDD_GPUCONFIG) -{ -} - -void GPUSettingsDlg::OnInit() -{ - __super::OnInit(); - - m_modes.clear(); - - { - D3DDISPLAYMODE mode; - memset(&mode, 0, sizeof(mode)); - m_modes.push_back(mode); - - ComboBoxAppend(IDC_RESOLUTION, "Please select...", (LPARAM)&m_modes.back(), true); - - if(CComPtr d3d = Direct3DCreate9(D3D_SDK_VERSION)) - { - uint32 w = theApp.GetConfig("ModeWidth", 0); - uint32 h = theApp.GetConfig("ModeHeight", 0); - uint32 hz = theApp.GetConfig("ModeRefreshRate", 0); - - uint32 n = d3d->GetAdapterModeCount(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8); - - for(uint32 i = 0; i < n; i++) - { - if(S_OK == d3d->EnumAdapterModes(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8, i, &mode)) - { - m_modes.push_back(mode); - - string str = format("%dx%d %dHz", mode.Width, mode.Height, mode.RefreshRate); - - ComboBoxAppend(IDC_RESOLUTION, str.c_str(), (LPARAM)&m_modes.back(), w == mode.Width && h == mode.Height && hz == mode.RefreshRate); - } - } - } - } - - ComboBoxInit(IDC_RENDERER, theApp.m_gpu_renderers, theApp.GetConfig("Renderer", 0)); - ComboBoxInit(IDC_FILTER, theApp.m_gpu_filter, theApp.GetConfig("filter", 0)); - ComboBoxInit(IDC_DITHERING, theApp.m_gpu_dithering, theApp.GetConfig("dithering", 1)); - ComboBoxInit(IDC_ASPECTRATIO, theApp.m_gpu_aspectratio, theApp.GetConfig("AspectRatio", 1)); - ComboBoxInit(IDC_SCALE, theApp.m_gpu_scale, theApp.GetConfig("scale_x", 0) | (theApp.GetConfig("scale_y", 0) << 2)); - - CheckDlgButton(m_hWnd, IDC_WINDOWED, theApp.GetConfig("windowed", 1)); - - SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 0)); - SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("extrathreads", DEFAULT_EXTRA_RENDERING_THREADS), 0)); - - UpdateControls(); -} - -bool GPUSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) -{ - if(id == IDC_RENDERER && code == CBN_SELCHANGE) - { - UpdateControls(); - } - else if(id == IDOK) - { - INT_PTR data; - - if(ComboBoxGetSelData(IDC_RESOLUTION, data)) - { - const D3DDISPLAYMODE* mode = (D3DDISPLAYMODE*)data; - - theApp.SetConfig("ModeWidth", (int)mode->Width); - theApp.SetConfig("ModeHeight", (int)mode->Height); - theApp.SetConfig("ModeRefreshRate", (int)mode->RefreshRate); - } - - if(ComboBoxGetSelData(IDC_RENDERER, data)) - { - theApp.SetConfig("Renderer", (int)data); - } - - if(ComboBoxGetSelData(IDC_FILTER, data)) - { - theApp.SetConfig("filter", (int)data); - } - - if(ComboBoxGetSelData(IDC_DITHERING, data)) - { - theApp.SetConfig("dithering", (int)data); - } - - if(ComboBoxGetSelData(IDC_ASPECTRATIO, data)) - { - theApp.SetConfig("AspectRatio", (int)data); - } - - if(ComboBoxGetSelData(IDC_SCALE, data)) - { - theApp.SetConfig("scale_x", data & 3); - theApp.SetConfig("scale_y", (data >> 2) & 3); - } - - theApp.SetConfig("extrathreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0)); - theApp.SetConfig("windowed", (int)IsDlgButtonChecked(m_hWnd, IDC_WINDOWED)); - } - - return __super::OnCommand(hWnd, id, code); -} - -void GPUSettingsDlg::UpdateControls() -{ - INT_PTR i; - - if(ComboBoxGetSelData(IDC_RENDERER, i)) - { - bool dx9 = i == 0; - bool dx11 = i == 1; - bool sw = i >= 0 && i <= 2; - - ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO9), dx9 ? SW_SHOW : SW_HIDE); - ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO11), dx11 ? SW_SHOW : SW_HIDE); - - EnableWindow(GetDlgItem(m_hWnd, IDC_SCALE), sw); - EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw); - EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw); - } -} diff --git a/plugins/GSdx_legacy/GPUSettingsDlg.h b/plugins/GSdx_legacy/GPUSettingsDlg.h deleted file mode 100644 index 7af5202a69..0000000000 --- a/plugins/GSdx_legacy/GPUSettingsDlg.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDialog.h" -#include "GSSetting.h" - -class GPUSettingsDlg : public GSDialog -{ - list m_modes; - - void UpdateControls(); - -protected: - void OnInit(); - bool OnCommand(HWND hWnd, UINT id, UINT code); - -public: - GPUSettingsDlg(); -}; diff --git a/plugins/GSdx_legacy/GPUSetupPrimCodeGenerator.cpp b/plugins/GSdx_legacy/GPUSetupPrimCodeGenerator.cpp deleted file mode 100644 index 5367fc3a0b..0000000000 --- a/plugins/GSdx_legacy/GPUSetupPrimCodeGenerator.cpp +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -// TODO: x64 - -#include "stdafx.h" -#include "GPUSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" - -using namespace Xbyak; - -static const int _args = 0; -static const int _vertex = _args + 4; -static const int _index = _args + 8; -static const int _dscan = _args + 12; - -GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) - , m_local(*(GPUScanlineLocalData*)param) -{ - m_sel.key = key; - - Generate(); -} - -void GPUSetupPrimCodeGenerator::Generate() -{ - if(m_sel.tme && !m_sel.twin) - { - pcmpeqd(xmm0, xmm0); - - if(m_sel.sprite) - { - // t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001(); - - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * 1]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - - cvttps2dq(xmm1, ptr[ecx + offsetof(GSVertexSW, t)]); - psrld(xmm1, 8); - psrld(xmm0, 31); - psubd(xmm1, xmm0); - - // t = t.ps32(t); - // t = t.upl16(t); - - packssdw(xmm1, xmm1); - punpcklwd(xmm1, xmm1); - - // m_local.twin[2].u = t.xxxx(); - // m_local.twin[2].v = t.yyyy(); - - pshufd(xmm2, xmm1, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm3, xmm1, _MM_SHUFFLE(1, 1, 1, 1)); - - movdqa(ptr[&m_local.twin[2].u], xmm2); - movdqa(ptr[&m_local.twin[2].v], xmm3); - } - else - { - // TODO: not really needed - - // m_local.twin[2].u = GSVector4i::x00ff(); - // m_local.twin[2].v = GSVector4i::x00ff(); - - psrlw(xmm0, 8); - - movdqa(ptr[&m_local.twin[2].u], xmm0); - movdqa(ptr[&m_local.twin[2].v], xmm0); - } - } - - if(m_sel.tme || m_sel.iip && m_sel.tfx != 3) - { - mov(edx, dword[esp + _dscan]); - - for(int i = 0; i < 3; i++) - { - movaps(Xmm(5 + i), ptr[&m_shift[i]]); - } - - // GSVector4 dt = dscan.t; - // GSVector4 dc = dscan.c; - - movaps(xmm4, ptr[edx + offsetof(GSVertexSW, c)]); - movaps(xmm3, ptr[edx + offsetof(GSVertexSW, t)]); - - // GSVector4i dtc8 = GSVector4i(dt * 8.0f).ps32(GSVector4i(dc * 8.0f)); - - movaps(xmm1, xmm3); - mulps(xmm1, xmm5); - cvttps2dq(xmm1, xmm1); - movaps(xmm2, xmm4); - mulps(xmm2, xmm5); - cvttps2dq(xmm2, xmm2); - packssdw(xmm1, xmm2); - - if(m_sel.tme) - { - // m_local.d8.st = dtc8.upl16(dtc8); - - movdqa(xmm0, xmm1); - punpcklwd(xmm0, xmm0); - movdqa(ptr[&m_local.d8.st], xmm0); - } - - if(m_sel.iip && m_sel.tfx != 3) - { - // m_local.d8.c = dtc8.uph16(dtc8); - - punpckhwd(xmm1, xmm1); - movdqa(ptr[&m_local.d8.c], xmm1); - } - - // xmm3 = dt - // xmm4 = dc - // xmm6 = ps0123 - // xmm7 = ps4567 - // xmm0, xmm1, xmm2, xmm5 = free - - if(m_sel.tme) - { - // GSVector4 dtx = dt.xxxx(); - // GSVector4 dty = dt.yyyy(); - - movaps(xmm0, xmm3); - shufps(xmm3, xmm3, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - - // m_local.d.s = GSVector4i(dtx * ps0123).ps32(GSVector4i(dtx * ps4567)); - - movaps(xmm1, xmm3); - mulps(xmm3, xmm6); - mulps(xmm1, xmm7); - cvttps2dq(xmm3, xmm3); - cvttps2dq(xmm1, xmm1); - packssdw(xmm3, xmm1); - movdqa(ptr[&m_local.d.s], xmm3); - - // m_local.d.t = GSVector4i(dty * ps0123).ps32(GSVector4i(dty * ps4567)); - - movaps(xmm1, xmm0); - mulps(xmm0, xmm6); - mulps(xmm1, xmm7); - cvttps2dq(xmm0, xmm0); - cvttps2dq(xmm1, xmm1); - packssdw(xmm0, xmm1); - movdqa(ptr[&m_local.d.t], xmm0); - } - - // xmm4 = dc - // xmm6 = ps0123 - // xmm7 = ps4567 - // xmm0, xmm1, zmm2, xmm3, xmm5 = free - - if(m_sel.iip && m_sel.tfx != 3) - { - // GSVector4 dcx = dc.xxxx(); - // GSVector4 dcy = dc.yyyy(); - // GSVector4 dcz = dc.zzzz(); - - movaps(xmm0, xmm4); - movaps(xmm1, xmm4); - shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d.r = GSVector4i(dcx * ps0123).ps32(GSVector4i(dcx * ps4567)); - - movaps(xmm2, xmm4); - mulps(xmm4, xmm6); - mulps(xmm2, xmm7); - cvttps2dq(xmm4, xmm4); - cvttps2dq(xmm2, xmm2); - packssdw(xmm4, xmm2); - movdqa(ptr[&m_local.d.r], xmm4); - - // m_local.d.g = GSVector4i(dcy * ps0123).ps32(GSVector4i(dcy * ps4567)); - - movaps(xmm2, xmm0); - mulps(xmm0, xmm6); - mulps(xmm2, xmm7); - cvttps2dq(xmm0, xmm0); - cvttps2dq(xmm2, xmm2); - packssdw(xmm0, xmm2); - movdqa(ptr[&m_local.d.g], xmm0); - - // m_local.d.b = GSVector4i(dcz * ps0123).ps32(GSVector4i(dcz * ps4567)); - - movaps(xmm2, xmm1); - mulps(xmm1, xmm6); - mulps(xmm2, xmm7); - cvttps2dq(xmm1, xmm1); - cvttps2dq(xmm2, xmm2); - packssdw(xmm1, xmm2); - movdqa(ptr[&m_local.d.b], xmm1); - } - } - - ret(); -} - -const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] = -{ - GSVector4(8.0f, 8.0f, 8.0f, 8.0f), - GSVector4(0.0f, 1.0f, 2.0f, 3.0f), - GSVector4(4.0f, 5.0f, 6.0f, 7.0f), -}; diff --git a/plugins/GSdx_legacy/GPUSetupPrimCodeGenerator.h b/plugins/GSdx_legacy/GPUSetupPrimCodeGenerator.h deleted file mode 100644 index 938c8b4736..0000000000 --- a/plugins/GSdx_legacy/GPUSetupPrimCodeGenerator.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPUScanlineEnvironment.h" -#include "GSFunctionMap.h" - -class GPUSetupPrimCodeGenerator : public GSCodeGenerator -{ - void operator = (const GPUSetupPrimCodeGenerator&); - - GPUScanlineSelector m_sel; - GPUScanlineLocalData& m_local; - - void Generate(); - -public: - GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize); - - static const GSVector4 m_shift[3]; -}; \ No newline at end of file diff --git a/plugins/GSdx_legacy/GPUState.cpp b/plugins/GSdx_legacy/GPUState.cpp deleted file mode 100644 index 90feb5adf8..0000000000 --- a/plugins/GSdx_legacy/GPUState.cpp +++ /dev/null @@ -1,809 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GPUState.h" - -GPUState::GPUState() - : s_n(0) -{ - memset(m_status, 0, sizeof(m_status)); - - for(size_t i = 0; i < countof(m_fpGPUStatusCommandHandlers); i++) - { - m_fpGPUStatusCommandHandlers[i] = &GPUState::SCH_Null; - } - - m_fpGPUStatusCommandHandlers[0x00] = &GPUState::SCH_ResetGPU; - m_fpGPUStatusCommandHandlers[0x01] = &GPUState::SCH_ResetCommandBuffer; - m_fpGPUStatusCommandHandlers[0x02] = &GPUState::SCH_ResetIRQ; - m_fpGPUStatusCommandHandlers[0x03] = &GPUState::SCH_DisplayEnable; - m_fpGPUStatusCommandHandlers[0x04] = &GPUState::SCH_DMASetup; - m_fpGPUStatusCommandHandlers[0x05] = &GPUState::SCH_StartOfDisplayArea; - m_fpGPUStatusCommandHandlers[0x06] = &GPUState::SCH_HorizontalDisplayRange; - m_fpGPUStatusCommandHandlers[0x07] = &GPUState::SCH_VerticalDisplayRange; - m_fpGPUStatusCommandHandlers[0x08] = &GPUState::SCH_DisplayMode; - m_fpGPUStatusCommandHandlers[0x10] = &GPUState::SCH_GPUInfo; - - m_fpGPUPacketHandler[0] = &GPUState::PH_Command; - m_fpGPUPacketHandler[1] = &GPUState::PH_Polygon; - m_fpGPUPacketHandler[2] = &GPUState::PH_Line; - m_fpGPUPacketHandler[3] = &GPUState::PH_Sprite; - m_fpGPUPacketHandler[4] = &GPUState::PH_Move; - m_fpGPUPacketHandler[5] = &GPUState::PH_Write; - m_fpGPUPacketHandler[6] = &GPUState::PH_Read; - m_fpGPUPacketHandler[7] = &GPUState::PH_Environment; - - Reset(); -} - -GPUState::~GPUState() -{ -} - -void GPUState::Reset() -{ - m_env.Reset(); - - m_mem.Invalidate(GSVector4i(0, 0, 1024, 512)); - - memset(&m_v, 0, sizeof(m_v)); -} - -void GPUState::Flush() -{ - FlushPrim(); -} - -void GPUState::SetPrim(GPUReg* r) -{ - if(m_env.PRIM.TYPE != r->PRIM.TYPE) - { - ResetPrim(); - } - - GPURegPRIM PRIM = r->PRIM; - - PRIM.VTX = 0; - - switch(r->PRIM.TYPE) - { - case GPU_POLYGON: - PRIM.u32 = (r->PRIM.u32 & 0xF7000000) | 3; // TYPE IIP TME ABE TGE - break; - case GPU_LINE: - PRIM.u32 = (r->PRIM.u32 & 0xF2000000) | 2; // TYPE IIP ABE - PRIM.TGE = 1; // ? - break; - case GPU_SPRITE: - PRIM.u32 = (r->PRIM.u32 & 0xE7000000) | 2; // TYPE TME ABE TGE - break; - } - - if(m_env.PRIM.u32 != PRIM.u32) - { - Flush(); - - m_env.PRIM = PRIM; - } -} - -void GPUState::SetCLUT(GPUReg* r) -{ - uint32 mask = 0xFFFF0000; // X Y - - uint32 value = (m_env.CLUT.u32 & ~mask) | (r->u32 & mask); - - if(m_env.CLUT.u32 != value) - { - Flush(); - - m_env.CLUT.u32 = value; - } -} - -void GPUState::SetTPAGE(GPUReg* r) -{ - uint32 mask = 0x000001FF; // TP ABR TY TX - - uint32 value = (m_env.STATUS.u32 & ~mask) | ((r->u32 >> 16) & mask); - - if(m_env.STATUS.u32 != value) - { - Flush(); - - m_env.STATUS.u32 = value; - } -} - -void GPUState::Invalidate(const GSVector4i& r) -{ - m_mem.Invalidate(r); -} - -void GPUState::WriteData(const uint8* mem, uint32 size) -{ - GSPerfMonAutoTimer pmat(&m_perfmon); - - size <<= 2; - - m_write.Append(mem, size); - - int i = 0; - - while(i < m_write.bytes) - { - GPUReg* r = (GPUReg*)&m_write.buff[i]; - - int ret = (this->*m_fpGPUPacketHandler[r->PACKET.TYPE])(r, (m_write.bytes - i) >> 2); - - if(ret == 0) return; // need more data - - i += ret << 2; - } - - m_write.Remove(i); -} - -void GPUState::ReadData(uint8* mem, uint32 size) -{ - GSPerfMonAutoTimer pmat(&m_perfmon); - - int remaining = m_read.bytes - m_read.cur; - - int bytes = (int)size << 2; - - if(bytes > remaining) - { - // ASSERT(0); - - // printf"WARNING: ReadData\n"); - - // memset(&mem[remaining], 0, bytes - remaining); - - bytes = remaining; - } - - memcpy(mem, &m_read.buff[m_read.cur], bytes); - - m_read.cur += bytes; - - if(m_read.cur >= m_read.bytes) - { - m_env.STATUS.IMG = 0; - } -} - -void GPUState::WriteStatus(uint32 status) -{ - GSPerfMonAutoTimer pmat(&m_perfmon); - - uint32 b = status >> 24; - - m_status[b] = status; - - (this->*m_fpGPUStatusCommandHandlers[b])((GPUReg*)&status); -} - -uint32 GPUState::ReadStatus() -{ - GSPerfMonAutoTimer pmat(&m_perfmon); - - m_env.STATUS.LCF = ~m_env.STATUS.LCF; // ? - - return m_env.STATUS.u32; -} - -void GPUState::Freeze(GPUFreezeData* data) -{ - data->status = m_env.STATUS.u32; - memcpy(data->control, m_status, 256 * 4); - m_mem.ReadRect(GSVector4i(0, 0, 1024, 512), data->vram); -} - -void GPUState::Defrost(const GPUFreezeData* data) -{ - m_env.STATUS.u32 = data->status; - memcpy(m_status, data->control, 256 * 4); - m_mem.WriteRect(GSVector4i(0, 0, 1024, 512), data->vram); - - for(int i = 0; i <= 8; i++) - { - WriteStatus(m_status[i]); - } -} - -void GPUState::SCH_Null(GPUReg* r) -{ - ASSERT(0); -} - -void GPUState::SCH_ResetGPU(GPUReg* r) -{ - Reset(); -} - -void GPUState::SCH_ResetCommandBuffer(GPUReg* r) -{ - // ? -} - -void GPUState::SCH_ResetIRQ(GPUReg* r) -{ - // ? -} - -void GPUState::SCH_DisplayEnable(GPUReg* r) -{ - m_env.STATUS.DEN = r->DEN.DEN; -} - -void GPUState::SCH_DMASetup(GPUReg* r) -{ - m_env.STATUS.DMA = r->DMA.DMA; -} - -void GPUState::SCH_StartOfDisplayArea(GPUReg* r) -{ - m_env.DAREA = r->DAREA; -} - -void GPUState::SCH_HorizontalDisplayRange(GPUReg* r) -{ - m_env.DHRANGE = r->DHRANGE; -} - -void GPUState::SCH_VerticalDisplayRange(GPUReg* r) -{ - m_env.DVRANGE = r->DVRANGE; -} - -void GPUState::SCH_DisplayMode(GPUReg* r) -{ - m_env.STATUS.WIDTH0 = r->DMODE.WIDTH0; - m_env.STATUS.HEIGHT = r->DMODE.HEIGHT; - m_env.STATUS.ISPAL = r->DMODE.ISPAL; - m_env.STATUS.ISRGB24 = r->DMODE.ISRGB24; - m_env.STATUS.ISINTER = r->DMODE.ISINTER; - m_env.STATUS.WIDTH1 = r->DMODE.WIDTH1; -} - -void GPUState::SCH_GPUInfo(GPUReg* r) -{ - uint32 value = 0; - - switch(r->GPUINFO.PARAM) - { - case 0x2: - value = m_env.TWIN.u32; - break; - case 0x0: - case 0x1: - case 0x3: - value = m_env.DRAREATL.u32; - break; - case 0x4: - value = m_env.DRAREABR.u32; - break; - case 0x5: - case 0x6: - value = m_env.DROFF.u32; - break; - case 0x7: - value = 2; - break; - case 0x8: - case 0xf: - value = 0xBFC03720; // ? - break; - default: - ASSERT(0); - break; - } - - m_read.RemoveAll(); - m_read.Append((uint8*)&value, 4); - m_read.cur = 0; -} - -int GPUState::PH_Command(GPUReg* r, int size) -{ - switch(r->PACKET.OPTION) - { - case 0: // ??? - - return 1; - - case 1: // clear cache - - return 1; - - case 2: // fillrect - - if(size < 3) return 0; - - Flush(); - - GSVector4i r2; - - r2.left = r[1].XY.X; - r2.top = r[1].XY.Y; - r2.right = r2.left + r[2].XY.X; - r2.bottom = r2.top + r[2].XY.Y; - - uint16 c = (uint16)(((r[0].RGB.R >> 3) << 10) | ((r[0].RGB.R >> 3) << 5) | (r[0].RGB.R >> 3)); - - m_mem.FillRect(r2, c); - - Invalidate(r2); - - Dump("f"); - - return 3; - } - - ASSERT(0); - - return 1; -} - -int GPUState::PH_Polygon(GPUReg* r, int size) -{ - int required = 1; - - int vertices = r[0].POLYGON.VTX ? 4 : 3; - - required += vertices; - - if(r[0].POLYGON.TME) required += vertices; - - if(r[0].POLYGON.IIP) required += vertices - 1; - - if(size < required) return 0; - - // - - SetPrim(r); - - if(r[0].POLYGON.TME) - { - SetCLUT(&r[2]); - - SetTPAGE(&r[r[0].POLYGON.IIP ? 5 : 4]); - } - - // - - GPUVertex v[4]; - - for(int i = 0, j = 0; j < vertices; j++) - { - v[j].RGB = r[r[0].POLYGON.IIP ? i : 0].RGB; - - if(j == 0 || r[0].POLYGON.IIP) i++; - - v[j].XY = r[i++].XY; - - if(r[0].POLYGON.TME) - { - v[j].UV.X = r[i].UV.U; - v[j].UV.Y = r[i].UV.V; - - i++; - } - } - - for(int i = 0; i <= vertices - 3; i++) - { - // TODO: sse - - int y0 = v[i + 0].XY.Y; - int y1 = v[i + 1].XY.Y; - int y2 = v[i + 2].XY.Y; - - if(std::abs(y0 - y1) >= 512 - || std::abs(y0 - y2) >= 512 - || std::abs(y1 - y2) >= 512) - { - continue; - } - - int x0 = v[i + 0].XY.X; - int x1 = v[i + 1].XY.X; - int x2 = v[i + 2].XY.X; - - if(std::abs(x0 - x1) >= 1024 - || std::abs(x0 - x2) >= 1024 - || std::abs(x1 - x2) >= 1024) - { - continue; - } - - // - - for(int j = 0; j < 3; j++) - { - m_v = v[i + j]; - - VertexKick(); - } - } - - // - - return required; -} - -int GPUState::PH_Line(GPUReg* r, int size) -{ - int required = 1; - - int vertices = 0; - - if(r->LINE.PLL) - { - required++; - - for(int i = 1; i < size; i++) - { - if((r[i].u32 & 0xf000f000) == 0x50005000) - { - vertices = i - 1; - } - } - - if(vertices < 2) - { - return 0; - } - } - else - { - vertices = 2; - } - - required += vertices; - - if(r->LINE.IIP) required += vertices - 1; - - // - - SetPrim(r); - - // - - for(int i = 0, j = 0; j < vertices; j++) - { - if(j >= 2) VertexKick(); - - m_v.RGB = r[r[0].LINE.IIP ? i : 0].RGB; - - if(j == 0 || r[0].LINE.IIP) i++; - - m_v.XY = r[i++].XY; - - VertexKick(); - } - - // - - return required; -} - -int GPUState::PH_Sprite(GPUReg* r, int size) -{ - int required = 2; - - if(r[0].SPRITE.TME) required++; - if(r[0].SPRITE.SIZE == 0) required++; - - if(size < required) return 0; - - // - - SetPrim(r); - - if(r[0].SPRITE.TME) - { - SetCLUT(&r[2]); - } - - // - - int i = 0; - - m_v.RGB = r[i++].RGB; - - m_v.XY = r[i++].XY; - - if(r[0].SPRITE.TME) - { - m_v.UV.X = r[i].UV.U; - m_v.UV.Y = r[i].UV.V; - - i++; - } - - VertexKick(); - - int w = 0; - int h = 0; - - switch(r[0].SPRITE.SIZE) - { - case 0: w = r[i].XY.X; h = r[i].XY.Y; i++; break; - case 1: w = h = 1; break; - case 2: w = h = 8; break; - case 3: w = h = 16; break; - default: __assume(0); - } - - m_v.XY.X += w; - m_v.XY.Y += h; - - if(r[0].SPRITE.TME) - { - m_v.UV.X += w; - m_v.UV.Y += h; - } - - VertexKick(); - - // - - return required; -} - -int GPUState::PH_Move(GPUReg* r, int size) -{ - if(size < 4) return 0; - - Flush(); - - int sx = r[1].XY.X; - int sy = r[1].XY.Y; - - int dx = r[2].XY.X; - int dy = r[2].XY.Y; - - int w = r[3].XY.X; - int h = r[3].XY.Y; - - m_mem.MoveRect(sx, sy, dx, dy, w, h); - - Invalidate(GSVector4i(dx, dy, dx + w, dy + h)); - - // Dump("m"); - - return 4; -} - -int GPUState::PH_Write(GPUReg* r, int size) -{ - if(size < 3) return 0; - - int w = r[2].XY.X; - int h = r[2].XY.Y; - - int required = 3 + ((w * h + 1) >> 1); - - if(size < required) return 0; - - Flush(); - - GSVector4i r2; - - r2.left = r[1].XY.X; - r2.top = r[1].XY.Y; - r2.right = r2.left + w; - r2.bottom = r2.top + h; - - m_mem.WriteRect(r2, (const uint16*)&r[3]); - - Invalidate(r2); - - Dump("w"); - - m_perfmon.Put(GSPerfMon::Swizzle, w * h * 2); - - return required; -} - -int GPUState::PH_Read(GPUReg* r, int size) -{ - if(size < 3) return 0; - - Flush(); - - int w = r[2].XY.X; - int h = r[2].XY.Y; - - if(w > 0 && h > 0) - { - GSVector4i r2; - - r2.left = r[1].XY.X; - r2.top = r[1].XY.Y; - r2.right = r2.left + w; - r2.bottom = r2.top + h; - - m_read.bytes = ((w * h + 1) & ~1) * 2; - m_read.cur = 0; - m_read.Reserve(m_read.bytes); - - m_mem.ReadRect(r2, (uint16*)m_read.buff); - - Dump("r"); - } - - m_env.STATUS.IMG = 1; - - return 3; -} - -int GPUState::PH_Environment(GPUReg* r, int size) -{ - switch(r->PACKET.OPTION) - { - case 1: // draw mode setting - - if(((m_env.STATUS.u32 ^ r->MODE.u32) & 0x7ff) != 0) - { - Flush(); - - m_env.STATUS.TX = r->MODE.TX; - m_env.STATUS.TY = r->MODE.TY; - m_env.STATUS.ABR = r->MODE.ABR; - m_env.STATUS.TP = r->MODE.TP; - m_env.STATUS.DTD = r->MODE.DTD; - m_env.STATUS.DFE = r->MODE.DFE; - } - - return 1; - - case 2: // texture window setting - - if(((m_env.TWIN.u32 ^ r->TWIN.u32) & 0xfffff) != 0) - { - Flush(); - - m_env.TWIN = r->TWIN; - } - - return 1; - - case 3: // set drawing area top left - - if(((m_env.DRAREATL.u32 ^ r->DRAREA.u32) & 0xfffff) != 0) - { - Flush(); - - m_env.DRAREATL = r->DRAREA; - } - - return 1; - - case 4: // set drawing area bottom right - - if(((m_env.DRAREABR.u32 ^ r->DRAREA.u32) & 0xfffff) != 0) - { - Flush(); - - m_env.DRAREABR = r->DRAREA; - } - - return 1; - - case 5: // drawing offset - - if(((m_env.DROFF.u32 ^ r->DROFF.u32) & 0x3fffff) != 0) - { - Flush(); - - m_env.DROFF = r->DROFF; - } - - return 1; - - case 6: // mask setting - - if(m_env.STATUS.MD != r->MASK.MD || m_env.STATUS.ME != r->MASK.ME) - { - Flush(); - - m_env.STATUS.MD = r->MASK.MD; - m_env.STATUS.ME = r->MASK.ME; - } - - return 1; - } - - ASSERT(0); - - return 1; -} - -// - -GPUState::Buffer::Buffer() -{ - bytes = 0; - maxbytes = 4096; - buff = (uint8*)_aligned_malloc(maxbytes, 32); - cur = 0; -} - -GPUState::Buffer::~Buffer() -{ - _aligned_free(buff); -} - -void GPUState::Buffer::Reserve(int size) -{ - if(size > maxbytes) - { - int new_maxbytes = (maxbytes + size + 1023) & ~1023; - uint8* new_buff = (uint8*)_aligned_malloc(new_maxbytes, 32); - - if(buff != NULL) - { - memcpy(new_buff, buff, maxbytes); - _aligned_free(buff); - } - - maxbytes = new_maxbytes; - buff = new_buff; - } -} - -void GPUState::Buffer::Append(const uint8* src, int size) -{ - Reserve(bytes + (int)size); - - memcpy(&buff[bytes], src, size); - - bytes += size; -} - -void GPUState::Buffer::Remove(int size) -{ - ASSERT(size <= bytes); - - if(size < bytes) - { - memmove(&buff[0], &buff[size], bytes - size); - - bytes -= size; - } - else - { - bytes = 0; - } - - #ifdef DEBUG - memset(&buff[bytes], 0xff, maxbytes - bytes); - #endif -} - -void GPUState::Buffer::RemoveAll() -{ - bytes = 0; -} diff --git a/plugins/GSdx_legacy/GPUState.h b/plugins/GSdx_legacy/GPUState.h deleted file mode 100644 index c2aeb287e7..0000000000 --- a/plugins/GSdx_legacy/GPUState.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPU.h" -#include "GPUDrawingEnvironment.h" -#include "GPULocalMemory.h" -#include "GPUVertex.h" -#include "GSAlignedClass.h" -#include "GSUtil.h" -#include "GSPerfMon.h" - -class GPUState : public GSAlignedClass<32> -{ - typedef void (GPUState::*GPUStatusCommandHandler)(GPUReg* r); - - GPUStatusCommandHandler m_fpGPUStatusCommandHandlers[256]; - - void SCH_Null(GPUReg* r); - void SCH_ResetGPU(GPUReg* r); - void SCH_ResetCommandBuffer(GPUReg* r); - void SCH_ResetIRQ(GPUReg* r); - void SCH_DisplayEnable(GPUReg* r); - void SCH_DMASetup(GPUReg* r); - void SCH_StartOfDisplayArea(GPUReg* r); - void SCH_HorizontalDisplayRange(GPUReg* r); - void SCH_VerticalDisplayRange(GPUReg* r); - void SCH_DisplayMode(GPUReg* r); - void SCH_GPUInfo(GPUReg* r); - - typedef int (GPUState::*GPUPacketHandler)(GPUReg* r, int size); - - GPUPacketHandler m_fpGPUPacketHandler[8]; - - int PH_Command(GPUReg* r, int size); - int PH_Polygon(GPUReg* r, int size); - int PH_Line(GPUReg* r, int size); - int PH_Sprite(GPUReg* r, int size); - int PH_Move(GPUReg* r, int size); - int PH_Write(GPUReg* r, int size); - int PH_Read(GPUReg* r, int size); - int PH_Environment(GPUReg* r, int size); - - class Buffer - { - public: - int bytes; - int maxbytes; - uint8* buff; - int cur; - - public: - Buffer(); - ~Buffer(); - void Reserve(int size); - void Append(const uint8* src, int size); - void Remove(int size); - void RemoveAll(); - }; - - Buffer m_write; - Buffer m_read; - - void SetPrim(GPUReg* r); - void SetCLUT(GPUReg* r); - void SetTPAGE(GPUReg* r); - -protected: - - int s_n; - - void Dump(const string& s, uint32 TP, const GSVector4i& r, int inc = true) - { - //if(m_perfmon.GetFrame() < 1000) - //if((m_env.TWIN.u32 & 0xfffff) == 0) - //if(!m_env.STATUS.ME && !m_env.STATUS.MD) - return; - - if(inc) s_n++; - - //if(s_n < 86) return; - - int dir = 1; -#ifdef DEBUG - dir = 2; -#endif - string path = format("c:\\temp%d\\%04d_%s.bmp", dir, s_n, s.c_str()); - - m_mem.SaveBMP(path, r, TP, m_env.CLUT.X, m_env.CLUT.Y); - } - - void Dump(const string& s, int inc = true) - { - Dump(s, 2, GSVector4i(0, 0, 1024, 512), inc); - } - -public: - GPUDrawingEnvironment m_env; - GPULocalMemory m_mem; - GPUVertex m_v; - GSPerfMon m_perfmon; - uint32 m_status[256]; - -public: - GPUState(); - virtual ~GPUState(); - - virtual void Reset(); - virtual void Flush(); - virtual void FlushPrim() = 0; - virtual void ResetPrim() = 0; - virtual void VertexKick() = 0; - virtual void Invalidate(const GSVector4i& r); - - void WriteData(const uint8* mem, uint32 size); - void ReadData(uint8* mem, uint32 size); - - void WriteStatus(uint32 status); - uint32 ReadStatus(); - - void Freeze(GPUFreezeData* data); - void Defrost(const GPUFreezeData* data); -}; - diff --git a/plugins/GSdx_legacy/GPUVertex.h b/plugins/GSdx_legacy/GPUVertex.h deleted file mode 100644 index 05455a4c42..0000000000 --- a/plugins/GSdx_legacy/GPUVertex.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GPU.h" -#include "GSVector.h" - -#pragma pack(push, 1) - -__aligned(struct, 32) GPUVertex -{ - union - { - struct - { - GPURegRGB RGB; - GPURegXY XY; - GPURegXY UV; - }; - - struct {__m128i m128i;}; - struct {__m128 m128;}; - }; - - GPUVertex() {memset(this, 0, sizeof(*this));} -}; - -struct GPUVertexNull -{ -}; - -#pragma pack(pop) diff --git a/plugins/GSdx_legacy/GS.cpp b/plugins/GSdx_legacy/GS.cpp deleted file mode 100644 index 0eecf951b8..0000000000 --- a/plugins/GSdx_legacy/GS.cpp +++ /dev/null @@ -1,1768 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSUtil.h" -#include "GSRendererSW.h" -#include "GSRendererNull.h" -#include "GSDeviceNull.h" -#include "GSDeviceOGL.h" -#include "GSRendererOGL.h" -#include "GSRendererCL.h" - -#ifdef _WIN32 - -#include "GSRendererDX9.h" -#include "GSRendererDX11.h" -#include "GSDevice9.h" -#include "GSDevice11.h" -#include "GSWndDX.h" -#include "GSWndWGL.h" -#include "GSRendererCS.h" -#include "GSSettingsDlg.h" - -static HRESULT s_hr = E_FAIL; - -#else - -#include "GSWndOGL.h" -#include "GSWndEGL.h" - -#include -#include - -extern bool RunLinuxDialog(); - -#endif - -#define PS2E_LT_GS 0x01 -#define PS2E_GS_VERSION 0x0006 -#define PS2E_X86 0x01 // 32 bit -#define PS2E_X86_64 0x02 // 64 bit - -static GSRenderer* s_gs = NULL; -static void (*s_irq)() = NULL; -static uint8* s_basemem = NULL; -static GSRendererType s_renderer = GSRendererType::Undefined; -static bool s_framelimit = true; -static bool s_vsync = false; -static bool s_exclusive = true; -static const char *s_renderer_name = ""; -static const char *s_renderer_type = ""; -bool gsopen_done = false; // crash guard for GSgetTitleInfo2 and GSKeyEvent (replace with lock?) - -EXPORT_C_(uint32) PS2EgetLibType() -{ - return PS2E_LT_GS; -} - -EXPORT_C_(const char*) PS2EgetLibName() -{ - return GSUtil::GetLibName(); -} - -EXPORT_C_(uint32) PS2EgetLibVersion2(uint32 type) -{ - const uint32 revision = 1; - const uint32 build = 0; - - return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (PLUGIN_VERSION << 24); -} - -EXPORT_C_(uint32) PS2EgetCpuPlatform() -{ -#ifdef _M_AMD64 - - return PS2E_X86_64; - -#else - - return PS2E_X86; - -#endif -} - -EXPORT_C GSsetBaseMem(uint8* mem) -{ - s_basemem = mem; - - if(s_gs) - { - s_gs->SetRegsMem(s_basemem); - } -} - -EXPORT_C GSsetSettingsDir(const char* dir) -{ - theApp.SetConfigDir(dir); -} - -EXPORT_C_(int) GSinit() -{ - if(!GSUtil::CheckSSE()) - { - return -1; - } - -#ifdef _WIN32 - - s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); - - if (!GSDeviceDX::LoadD3DCompiler()) - { - return -1; - } -#endif - - return 0; -} - -EXPORT_C GSshutdown() -{ - gsopen_done = false; - - delete s_gs; - - s_gs = NULL; - - s_renderer = GSRendererType::Undefined; - -#ifdef _WIN32 - - if(SUCCEEDED(s_hr)) - { - ::CoUninitialize(); - - s_hr = E_FAIL; - } - - GSDeviceDX::FreeD3DCompiler(); - -#endif -} - -EXPORT_C GSclose() -{ - gsopen_done = false; - - if(s_gs == NULL) return; - - s_gs->ResetDevice(); - - // Opengl requirement: It must be done before the Detach() of - // the context - delete s_gs->m_dev; - - s_gs->m_dev = NULL; - - if (s_gs->m_wnd) - { - s_gs->m_wnd->Detach(); - } -} - -static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int threads = -1) -{ - GSDevice* dev = NULL; - - if(renderer == GSRendererType::Undefined) - { - renderer = static_cast(theApp.GetConfig("Renderer", static_cast(GSRendererType::Default))); - } - - if(threads == -1) - { - threads = theApp.GetConfig("extrathreads", DEFAULT_EXTRA_RENDERING_THREADS); - } - - GSWnd* wnd[2] = { NULL, NULL }; - - try - { - if (s_renderer != renderer) - { - // Emulator has made a render change request, which requires a completely - // new s_gs -- if the emu doesn't save/restore the GS state across this - // GSopen call then they'll get corrupted graphics, but that's not my problem. - - delete s_gs; - - s_gs = NULL; - } - - const char* renderer_fullname = ""; - const char* renderer_mode = ""; - - switch (renderer) - { - case GSRendererType::DX9_SW: - case GSRendererType::DX1011_SW: - case GSRendererType::Null_SW: - case GSRendererType::OGL_SW: - renderer_mode = "(Software mode)"; - break; - case GSRendererType::DX9_Null: - case GSRendererType::DX1011_Null: - case GSRendererType::Null_Null: - renderer_mode = "(Null mode)"; - break; - case GSRendererType::DX9_OpenCL: - case GSRendererType::DX1011_OpenCL: - case GSRendererType::Null_OpenCL: - case GSRendererType::OGL_OpenCL: - renderer_mode = "(OpenCL)"; - break; - default: - renderer_mode = "(Hardware mode)"; - break; - } - - switch (renderer) - { - default: -#ifdef _WIN32 - case GSRendererType::DX9_HW: - case GSRendererType::DX9_SW: - case GSRendererType::DX9_Null: - case GSRendererType::DX9_OpenCL: - dev = new GSDevice9(); - s_renderer_name = " D3D9"; - renderer_fullname = "Direct3D9"; - break; - case GSRendererType::DX1011_HW: - case GSRendererType::DX1011_SW: - case GSRendererType::DX1011_Null: - case GSRendererType::DX1011_OpenCL: - dev = new GSDevice11(); - s_renderer_name = " D3D11"; - renderer_fullname = "Direct3D11"; - break; -#endif - case GSRendererType::Null_HW: - case GSRendererType::Null_SW: - case GSRendererType::Null_Null: - case GSRendererType::Null_OpenCL: - dev = new GSDeviceNull(); - s_renderer_name = " Null"; - renderer_fullname = "Null"; - break; - case GSRendererType::OGL_HW: - case GSRendererType::OGL_SW: - case GSRendererType::OGL_OpenCL: - dev = new GSDeviceOGL(); - s_renderer_name = " OGL"; - renderer_fullname = "OpenGL"; - break; - } - - printf("Current Renderer: %s %s\n", renderer_fullname, renderer_mode); - - if (dev == NULL) - { - return -1; - } - - if (s_gs == NULL) - { - switch (renderer) - { - default: -#ifdef _WIN32 - case GSRendererType::DX9_HW: - s_gs = (GSRenderer*)new GSRendererDX9(); - s_renderer_type = " HW"; - break; - case GSRendererType::DX1011_HW: - s_gs = (GSRenderer*)new GSRendererDX11(); - s_renderer_type = " HW"; - break; -#endif - case GSRendererType::OGL_HW: - s_gs = (GSRenderer*)new GSRendererOGL(); - s_renderer_type = " HW"; - break; - case GSRendererType::DX9_SW: - case GSRendererType::DX1011_SW: - case GSRendererType::Null_SW: - case GSRendererType::OGL_SW: - s_gs = new GSRendererSW(threads); - s_renderer_type = " SW"; - break; - case GSRendererType::DX9_Null: - case GSRendererType::DX1011_Null: - case GSRendererType::Null_Null: - s_gs = new GSRendererNull(); - s_renderer_type = " Null"; - break; - case GSRendererType::DX9_OpenCL: - case GSRendererType::DX1011_OpenCL: - case GSRendererType::Null_OpenCL: - case GSRendererType::OGL_OpenCL: -#ifdef ENABLE_OPENCL - s_gs = new GSRendererCL(); - s_renderer_type = " OCL"; -#else - printf("GSdx error: OpenCL is disabled\n"); -#endif - break; - } - if (s_gs == NULL) - return -1; - - s_renderer = renderer; - } - - if (s_gs->m_wnd == NULL) - { -#ifdef _WIN32 - switch (renderer) - { - case GSRendererType::OGL_HW: - case GSRendererType::OGL_SW: - case GSRendererType::OGL_OpenCL: - s_gs->m_wnd = new GSWndWGL(); - break; - default: - s_gs->m_wnd = new GSWndDX(); - break; - } -#else -#ifdef EGL_SUPPORTED - wnd[0] = new GSWndEGL(); - wnd[1] = new GSWndOGL(); -#else - wnd[0] = new GSWndOGL(); -#endif -#endif - } - } - catch (std::exception& ex) - { - // Allowing std exceptions to escape the scope of the plugin callstack could - // be problematic, because of differing typeids between DLL and EXE compilations. - // ('new' could throw std::alloc) - - printf("GSdx error: Exception caught in GSopen: %s", ex.what()); - - return -1; - } - - s_gs->SetRegsMem(s_basemem); - s_gs->SetIrqCallback(s_irq); - s_gs->SetVSync(s_vsync); - s_gs->SetFrameLimit(s_framelimit); - - if(*dsp == NULL) - { - // old-style API expects us to create and manage our own window: - - int w = theApp.GetConfig("ModeWidth", 0); - int h = theApp.GetConfig("ModeHeight", 0); - -#ifdef __linux__ - for(uint32 i = 0; i < 2; i++) { - try - { - if (wnd[i] == NULL) continue; - - wnd[i]->Create(title, w, h); - s_gs->m_wnd = wnd[i]; - - if (i == 0) delete wnd[1]; - - break; - } - catch (GSDXRecoverableError) - { - wnd[i]->Detach(); - delete wnd[i]; - } - } - if (s_gs->m_wnd == NULL) - { - GSclose(); - - return -1; - } -#endif -#ifdef _WIN32 - if(!s_gs->CreateWnd(title, w, h)) - { - GSclose(); - - return -1; - } -#endif - - s_gs->m_wnd->Show(); - - *dsp = s_gs->m_wnd->GetDisplay(); - } - else - { - s_gs->SetMultithreaded(true); - -#ifdef __linux__ - if (s_gs->m_wnd) { - // A window was already attached to s_gs so we also - // need to restore the window state (Attach) - s_gs->m_wnd->Attach((void*)((uptr*)(dsp)+1), false); - } else { - // No window found, try to attach a GLX win and retry - // with EGL win if failed. - for(uint32 i = 0; i < 2; i++) { - try - { - if (wnd[i] == NULL) continue; - - wnd[i]->Attach((void*)((uptr*)(dsp)+1), false); - s_gs->m_wnd = wnd[i]; - - if (i == 0) delete wnd[1]; - - break; - } - catch (GSDXRecoverableError) - { - wnd[i]->Detach(); - delete wnd[i]; - } - } - } -#endif -#ifdef _WIN32 - try - { - s_gs->m_wnd->Attach(*dsp, false); - } - catch (GSDXRecoverableError) - { - s_gs->m_wnd->Detach(); - delete s_gs->m_wnd; - s_gs->m_wnd = NULL; - } -#endif - if (s_gs->m_wnd == NULL) - { - return -1; - } - } - - if(!s_gs->CreateDevice(dev)) - { - // This probably means the user has DX11 configured with a video card that is only DX9 - // compliant. Cound mean drivr issues of some sort also, but to be sure, that's the most - // common cause of device creation errors. :) --air - - GSclose(); - - return -1; - } - - if (renderer == GSRendererType::OGL_HW && theApp.GetConfig("debug_glsl_shader", 0) == 2) { - printf("GSdx: test OpenGL shader. Please wait...\n\n"); - static_cast(s_gs->m_dev)->SelfShaderTest(); - printf("\nGSdx: test OpenGL shader done. It will now exit\n"); - return -1; - } - - return 0; -} - -EXPORT_C_(int) GSopen2(void** dsp, uint32 flags) -{ - static bool stored_toggle_state = false; - bool toggle_state = !!(flags & 4); - - GSRendererType renderer = s_renderer; - // Fresh start up or config file changed - if (renderer == GSRendererType::Undefined) - { -#ifdef _WIN32 - GSRendererType default_renderer = GSUtil::CheckDirect3D11Level() >= D3D_FEATURE_LEVEL_10_0 ? GSRendererType::DX1011_HW : GSRendererType::DX9_HW; -#else - GSRendererType default_renderer = GSRendererType::Default; -#endif - renderer = static_cast(theApp.GetConfig("Renderer", static_cast(default_renderer))); - } - else if (stored_toggle_state != toggle_state) - { -#ifdef _WIN32 - GSRendererType best_sw_renderer = GSUtil::CheckDirect3D11Level() >= D3D_FEATURE_LEVEL_10_0 ? GSRendererType::DX1011_SW : GSRendererType::DX9_SW; - - - switch (renderer) { - // Use alternative renderer (SW if currently using HW renderer, and vice versa, keeping the same API and API version) - case GSRendererType::DX9_SW: renderer = GSRendererType::DX9_HW; break; - case GSRendererType::DX9_HW: renderer = GSRendererType::DX9_SW; break; - case GSRendererType::DX1011_SW: renderer = GSRendererType::DX1011_HW; break; - case GSRendererType::DX1011_HW: renderer = GSRendererType::DX1011_SW; break; - case GSRendererType::OGL_SW: renderer = GSRendererType::OGL_HW; break; - case GSRendererType::OGL_HW: renderer = GSRendererType::OGL_SW; break; - default: renderer = best_sw_renderer; break;// If wasn't using one of the above mentioned ones, use best SW renderer. - - } - -#endif -#ifdef __linux__ - switch(renderer) { - // Use alternative renderer (SW if currently using HW renderer, and vice versa) - case GSRendererType::OGL_SW: renderer = GSRendererType::OGL_HW; break; - case GSRendererType::OGL_HW: renderer = GSRendererType::OGL_SW; break; - default: renderer = GSRendererType::OGL_SW; break; // fallback to OGL SW - } -#endif - } - stored_toggle_state = toggle_state; - - int retval = _GSopen(dsp, "", renderer); - - if (s_gs != NULL) - s_gs->SetAspectRatio(0); // PCSX2 manages the aspect ratios - - gsopen_done = true; - - return retval; -} - -EXPORT_C_(int) GSopen(void** dsp, const char* title, int mt) -{ - /* - if(!XInitThreads()) return -1; - - Display* display = XOpenDisplay(0); - - XCloseDisplay(display); - */ - - GSRendererType renderer = GSRendererType::Default; - - // Legacy GUI expects to acquire vsync from the configuration files. - - s_vsync = !!theApp.GetConfig("vsync", 0); - - if(mt == 2) - { - // pcsx2 sent a switch renderer request - -#ifdef _WIN32 - - renderer = GSUtil::CheckDirect3D11Level() >= D3D_FEATURE_LEVEL_10_0 ? GSRendererType::DX1011_SW : GSRendererType::DX9_SW; - -#endif - - mt = 1; - } - else - { - // normal init - - renderer = static_cast(theApp.GetConfig("Renderer", static_cast(GSRendererType::Default))); - } - - *dsp = NULL; - - int retval = _GSopen(dsp, title, renderer); - - if(retval == 0 && s_gs) - { - s_gs->SetMultithreaded(!!mt); - } - - gsopen_done = true; - - return retval; -} - -EXPORT_C GSreset() -{ - try - { - s_gs->Reset(); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSgifSoftReset(uint32 mask) -{ - try - { - s_gs->SoftReset(mask); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSwriteCSR(uint32 csr) -{ - try - { - s_gs->WriteCSR(csr); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSinitReadFIFO(uint8* mem) -{ - GL_PERF("Init Read FIFO1"); - try - { - s_gs->InitReadFIFO(mem, 1); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSreadFIFO(uint8* mem) -{ - try - { - s_gs->ReadFIFO(mem, 1); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSinitReadFIFO2(uint8* mem, uint32 size) -{ - GL_PERF("Init Read FIFO2"); - try - { - s_gs->InitReadFIFO(mem, size); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSreadFIFO2(uint8* mem, uint32 size) -{ - try - { - s_gs->ReadFIFO(mem, size); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSgifTransfer(const uint8* mem, uint32 size) -{ - try - { - s_gs->Transfer<3>(mem, size); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSgifTransfer1(uint8* mem, uint32 addr) -{ - try - { - s_gs->Transfer<0>(const_cast(mem) + addr, (0x4000 - addr) / 16); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSgifTransfer2(uint8* mem, uint32 size) -{ - try - { - s_gs->Transfer<1>(const_cast(mem), size); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSgifTransfer3(uint8* mem, uint32 size) -{ - try - { - s_gs->Transfer<2>(const_cast(mem), size); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C GSvsync(int field) -{ - try - { -#ifdef _WIN32 - - if(s_gs->m_wnd->IsManaged()) - { - MSG msg; - - memset(&msg, 0, sizeof(msg)); - - while(msg.message != WM_QUIT && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) - { - TranslateMessage(&msg); - DispatchMessage(&msg); - } - } - -#endif - - s_gs->VSync(field); - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C_(uint32) GSmakeSnapshot(char* path) -{ - try - { - string s(path); - - if(!s.empty() && s[s.length() - 1] != DIRECTORY_SEPARATOR) - { - s = s + DIRECTORY_SEPARATOR; - } - - return s_gs->MakeSnapshot(s + "gsdx"); - } - catch (GSDXRecoverableError) - { - return false; - } -} - -EXPORT_C GSkeyEvent(GSKeyEventData* e) -{ - try - { - if(gsopen_done) - { - s_gs->KeyEvent(e); - } - } - catch (GSDXRecoverableError) - { - } -} - -EXPORT_C_(int) GSfreeze(int mode, GSFreezeData* data) -{ - try - { - if(mode == FREEZE_SAVE) - { - return s_gs->Freeze(data, false); - } - else if(mode == FREEZE_SIZE) - { - return s_gs->Freeze(data, true); - } - else if(mode == FREEZE_LOAD) - { - return s_gs->Defrost(data); - } - } - catch (GSDXRecoverableError) - { - } - - return 0; -} - -EXPORT_C GSconfigure() -{ - try - { - if(!GSUtil::CheckSSE()) return; - -#ifdef _WIN32 - GSDialog::InitCommonControls(); - if(GSSettingsDlg().DoModal() == IDOK) - { - // Force a reload of the gs state - s_renderer = GSRendererType::Undefined; - } - -#else - - if (RunLinuxDialog()) { - theApp.ReloadConfig(); - // Force a reload of the gs state - s_renderer = GSRendererType::Undefined; - } - -#endif - - } catch (GSDXRecoverableError) - { - } -} - -EXPORT_C_(int) GStest() -{ - if(!GSUtil::CheckSSE()) - { - return -1; - } - -#ifdef _WIN32 - - s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); - - if(!GSUtil::CheckDirectX()) - { - if(SUCCEEDED(s_hr)) - { - ::CoUninitialize(); - } - - s_hr = E_FAIL; - - return -1; - } - - if(SUCCEEDED(s_hr)) - { - ::CoUninitialize(); - } - - s_hr = E_FAIL; - -#endif - - return 0; -} - -EXPORT_C GSabout() -{ -} - -EXPORT_C GSirqCallback(void (*irq)()) -{ - s_irq = irq; - - if(s_gs) - { - s_gs->SetIrqCallback(s_irq); - } -} - -void pt(const char* str){ - struct tm *current; - time_t now; - - time(&now); - current = localtime(&now); - - printf("%02i:%02i:%02i%s", current->tm_hour, current->tm_min, current->tm_sec, str); -} - -EXPORT_C_(int) GSsetupRecording(int start, void* data) -{ - if (s_gs == NULL) { - printf("GSdx: no s_gs for recording\n"); - return 0; - } -#ifdef __linux__ - if (!theApp.GetConfig("capture_enabled", 0)) { - printf("GSdx: Recording is disabled\n"); - return 0; - } -#endif - - if(start & 1) - { - printf("GSdx: Recording start command\n"); - if (s_gs->BeginCapture()) { - pt(" - Capture started\n"); - } else { - pt(" - Capture cancelled\n"); - return 0; - } - } - else - { - printf("GSdx: Recording end command\n"); - s_gs->EndCapture(); - pt(" - Capture ended\n"); - } - - return 1; -} - -EXPORT_C GSsetGameCRC(uint32 crc, int options) -{ - s_gs->SetGameCRC(crc, options); -} - -EXPORT_C GSgetLastTag(uint32* tag) -{ - s_gs->GetLastTag(tag); -} - -EXPORT_C GSgetTitleInfo2(char* dest, size_t length) -{ - string s = "GSdx"; - s.append(s_renderer_name).append(s_renderer_type); - - // TODO: this gets called from a different thread concurrently with GSOpen (on linux) - if (gsopen_done && s_gs != NULL && s_gs->m_GStitleInfoBuffer[0]) - { - std::lock_guard lock(s_gs->m_pGSsetTitle_Crit); - - s.append(" | ").append(s_gs->m_GStitleInfoBuffer); - - if(s.size() > length - 1) - { - s = s.substr(0, length - 1); - } - } - - strcpy(dest, s.c_str()); -} - -EXPORT_C GSsetFrameSkip(int frameskip) -{ - s_gs->SetFrameSkip(frameskip); -} - -EXPORT_C GSsetVsync(int enabled) -{ - s_vsync = !!enabled; - - if(s_gs) - { - s_gs->SetVSync(s_vsync); - } -} - -EXPORT_C GSsetExclusive(int enabled) -{ - s_exclusive = !!enabled; - - if(s_gs) - { - s_gs->SetVSync(s_vsync); - } -} - -EXPORT_C GSsetFrameLimit(int limit) -{ - s_framelimit = !!limit; - - if(s_gs) - { - s_gs->SetFrameLimit(s_framelimit); - } -} - -#ifdef _WIN32 - -#include -#include - -class Console -{ - HANDLE m_console; - string m_title; - -public: - Console::Console(LPCSTR title, bool open) - : m_console(NULL) - , m_title(title) - { - if(open) Open(); - } - - Console::~Console() - { - Close(); - } - - void Console::Open() - { - if(m_console == NULL) - { - CONSOLE_SCREEN_BUFFER_INFO csbiInfo; - - AllocConsole(); - - SetConsoleTitle(m_title.c_str()); - - m_console = GetStdHandle(STD_OUTPUT_HANDLE); - - COORD size; - - size.X = 100; - size.Y = 300; - - SetConsoleScreenBufferSize(m_console, size); - - GetConsoleScreenBufferInfo(m_console, &csbiInfo); - - SMALL_RECT rect; - - rect = csbiInfo.srWindow; - rect.Right = rect.Left + 99; - rect.Bottom = rect.Top + 64; - - SetConsoleWindowInfo(m_console, TRUE, &rect); - - *stdout = *_fdopen(_open_osfhandle((long)m_console, _O_TEXT), "w"); - - setvbuf(stdout, NULL, _IONBF, 0); - } - } - - void Console::Close() - { - if(m_console != NULL) - { - FreeConsole(); - - m_console = NULL; - } - } -}; - -// lpszCmdLine: -// First parameter is the renderer. -// Second parameter is the gs file to load and run. - -EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) -{ - GSRendererType renderer = GSRendererType::Undefined; - - { - char* start = lpszCmdLine; - char* end = NULL; - long n = strtol(lpszCmdLine, &end, 10); - if(end > start) {renderer = static_cast(n); lpszCmdLine = end;} - } - - while(*lpszCmdLine == ' ') lpszCmdLine++; - - ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); - - if(FILE* fp = fopen(lpszCmdLine, "rb")) - { - Console console("GSdx", true); - - GSinit(); - - uint8 regs[0x2000]; - GSsetBaseMem(regs); - - s_vsync = !!theApp.GetConfig("vsync", 0); - - HWND hWnd = NULL; - - _GSopen((void**)&hWnd, "", renderer); - - uint32 crc; - fread(&crc, 4, 1, fp); - GSsetGameCRC(crc, 0); - - GSFreezeData fd; - fread(&fd.size, 4, 1, fp); - fd.data = new uint8[fd.size]; - fread(fd.data, fd.size, 1, fp); - GSfreeze(FREEZE_LOAD, &fd); - delete [] fd.data; - - fread(regs, 0x2000, 1, fp); - - long start = ftell(fp); - - GSvsync(1); - - struct Packet {uint8 type, param; uint32 size, addr; vector buff;}; - - list packets; - vector buff; - int type; - - while((type = fgetc(fp)) != EOF) - { - Packet* p = new Packet(); - - p->type = (uint8)type; - - switch(type) - { - case 0: - - p->param = (uint8)fgetc(fp); - - fread(&p->size, 4, 1, fp); - - switch(p->param) - { - case 0: - p->buff.resize(0x4000); - p->addr = 0x4000 - p->size; - fread(&p->buff[p->addr], p->size, 1, fp); - break; - case 1: - case 2: - case 3: - p->buff.resize(p->size); - fread(&p->buff[0], p->size, 1, fp); - break; - } - - break; - - case 1: - - p->param = (uint8)fgetc(fp); - - break; - - case 2: - - fread(&p->size, 4, 1, fp); - - break; - - case 3: - - p->buff.resize(0x2000); - - fread(&p->buff[0], 0x2000, 1, fp); - - break; - } - - packets.push_back(p); - } - - Sleep(100); - - while(IsWindowVisible(hWnd)) - { - for(list::iterator i = packets.begin(); i != packets.end(); i++) - { - Packet* p = *i; - - switch(p->type) - { - case 0: - - switch(p->param) - { - case 0: GSgifTransfer1(&p->buff[0], p->addr); break; - case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break; - case 2: GSgifTransfer3(&p->buff[0], p->size / 16); break; - case 3: GSgifTransfer(&p->buff[0], p->size / 16); break; - } - - break; - - case 1: - - GSvsync(p->param); - - break; - - case 2: - - if(buff.size() < p->size) buff.resize(p->size); - - GSreadFIFO2(&buff[0], p->size / 16); - - break; - - case 3: - - memcpy(regs, &p->buff[0], 0x2000); - - break; - } - } - } - - for(list::iterator i = packets.begin(); i != packets.end(); i++) - { - delete *i; - } - - packets.clear(); - - Sleep(100); - - - /* - vector buff; - bool exit = false; - - int round = 0; - - while(!exit) - { - uint32 index; - uint32 size; - uint32 addr; - - int pos; - - switch(fgetc(fp)) - { - case EOF: - fseek(fp, start, 0); - exit = !IsWindowVisible(hWnd); - //exit = ++round == 60; - break; - - case 0: - index = fgetc(fp); - fread(&size, 4, 1, fp); - - switch(index) - { - case 0: - if(buff.size() < 0x4000) buff.resize(0x4000); - addr = 0x4000 - size; - fread(&buff[addr], size, 1, fp); - GSgifTransfer1(&buff[0], addr); - break; - - case 1: - if(buff.size() < size) buff.resize(size); - fread(&buff[0], size, 1, fp); - GSgifTransfer2(&buff[0], size / 16); - break; - - case 2: - if(buff.size() < size) buff.resize(size); - fread(&buff[0], size, 1, fp); - GSgifTransfer3(&buff[0], size / 16); - break; - - case 3: - if(buff.size() < size) buff.resize(size); - fread(&buff[0], size, 1, fp); - GSgifTransfer(&buff[0], size / 16); - break; - } - - break; - - case 1: - GSvsync(fgetc(fp)); - exit = !IsWindowVisible(hWnd); - break; - - case 2: - fread(&size, 4, 1, fp); - if(buff.size() < size) buff.resize(size); - GSreadFIFO2(&buff[0], size / 16); - break; - - case 3: - fread(regs, 0x2000, 1, fp); - break; - } - } - */ - - GSclose(); - GSshutdown(); - - fclose(fp); - } -} - -EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) -{ - ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); - - Console console("GSdx", true); - - if(1) - { - GSLocalMemory* mem = new GSLocalMemory(); - - static struct {int psm; const char* name;} s_format[] = - { - {PSM_PSMCT32, "32"}, - {PSM_PSMCT24, "24"}, - {PSM_PSMCT16, "16"}, - {PSM_PSMCT16S, "16S"}, - {PSM_PSMT8, "8"}, - {PSM_PSMT4, "4"}, - {PSM_PSMT8H, "8H"}, - {PSM_PSMT4HL, "4HL"}, - {PSM_PSMT4HH, "4HH"}, - {PSM_PSMZ32, "32Z"}, - {PSM_PSMZ24, "24Z"}, - {PSM_PSMZ16, "16Z"}, - {PSM_PSMZ16S, "16ZS"}, - }; - - uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32); - - for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i; - - // - - for(int tbw = 5; tbw <= 10; tbw++) - { - int n = 256 << ((10 - tbw) * 2); - - int w = 1 << tbw; - int h = 1 << tbw; - - printf("%d x %d\n\n", w, h); - - for(size_t i = 0; i < countof(s_format); i++) - { - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[s_format[i].psm]; - - GSLocalMemory::writeImage wi = psm.wi; - GSLocalMemory::readImage ri = psm.ri; - GSLocalMemory::readTexture rtx = psm.rtx; - GSLocalMemory::readTexture rtxP = psm.rtxP; - - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.SBP = 0; - BITBLTBUF.SBW = w / 64; - BITBLTBUF.SPSM = s_format[i].psm; - BITBLTBUF.DBP = 0; - BITBLTBUF.DBW = w / 64; - BITBLTBUF.DPSM = s_format[i].psm; - - GIFRegTRXPOS TRXPOS; - - TRXPOS.SSAX = 0; - TRXPOS.SSAY = 0; - TRXPOS.DSAX = 0; - TRXPOS.DSAY = 0; - - GIFRegTRXREG TRXREG; - - TRXREG.RRW = w; - TRXREG.RRH = h; - - GSVector4i r(0, 0, w, h); - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = 0; - TEX0.TBW = w / 64; - - GIFRegTEXA TEXA; - - TEXA.TA0 = 0; - TEXA.TA1 = 0x80; - TEXA.AEM = 0; - - int trlen = w * h * psm.trbpp / 8; - int len = w * h * psm.bpp / 8; - - clock_t start, end; - - printf("[%4s] ", s_format[i].name); - - start = clock(); - - for(int j = 0; j < n; j++) - { - int x = 0; - int y = 0; - - (mem->*wi)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); - } - - end = clock(); - - printf("%6d %6d | ", (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - - start = clock(); - - for(int j = 0; j < n; j++) - { - int x = 0; - int y = 0; - - (mem->*ri)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); - } - - end = clock(); - - printf("%6d %6d | ", (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - - const GSOffset* off = mem->GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - - start = clock(); - - for(int j = 0; j < n; j++) - { - (mem->*rtx)(off, r, ptr, w * 4, TEXA); - } - - end = clock(); - - printf("%6d %6d ", (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - - if(psm.pal > 0) - { - start = clock(); - - for(int j = 0; j < n; j++) - { - (mem->*rtxP)(off, r, ptr, w, TEXA); - } - - end = clock(); - - printf("| %6d %6d ", (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - } - - printf("\n"); - } - - printf("\n"); - } - - _aligned_free(ptr); - - delete mem; - } - - // - - if(0) - { - GSLocalMemory* mem = new GSLocalMemory(); - - uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32); - - for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i; - - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[PSM_PSMCT32]; - - GSLocalMemory::writeImage wi = psm.wi; - - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.DBP = 0; - BITBLTBUF.DBW = 32; - BITBLTBUF.DPSM = PSM_PSMCT32; - - GIFRegTRXPOS TRXPOS; - - TRXPOS.DSAX = 0; - TRXPOS.DSAY = 1; - - GIFRegTRXREG TRXREG; - - TRXREG.RRW = 256; - TRXREG.RRH = 256; - - int trlen = 256 * 256 * psm.trbpp / 8; - - int x = 0; - int y = 0; - - (mem->*wi)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); - - delete mem; - } - - // - - PostQuitMessage(0); -} - -#endif - -#ifdef __linux__ - -#include -#include // ftime(), struct timeb -#include "GSLzma.h" - -inline unsigned long timeGetTime() -{ - timeb t; - ftime(&t); - - return (unsigned long)(t.time*1000 + t.millitm); -} - -// Note -EXPORT_C GSReplay(char* lpszCmdLine, int renderer) -{ - GLLoader::in_replayer = true; - - GSRendererType m_renderer; - // Allow to easyly switch between SW/HW renderer -> this effectively removes the ability to select the renderer by function args - m_renderer = static_cast(theApp.GetConfig("Renderer", static_cast(GSRendererType::Default))); - // alternatively: - // m_renderer = static_cast(renderer); - - if (m_renderer != GSRendererType::OGL_HW && m_renderer != GSRendererType::OGL_SW) - { - fprintf(stderr, "wrong renderer selected %d\n", static_cast(m_renderer)); - return; - } - - struct Packet {uint8 type, param; uint32 size, addr; vector buff;}; - - list packets; - vector buff; - vector stats; - stats.clear(); - uint8 regs[0x2000]; - - GSinit(); - - GSsetBaseMem(regs); - - s_vsync = !!theApp.GetConfig("vsync", 0); - - void* hWnd = NULL; - - int err = _GSopen((void**)&hWnd, "", m_renderer); - if (err != 0) { - fprintf(stderr, "Error failed to GSopen\n"); - return; - } - if (s_gs->m_wnd == NULL) return; - - { // Read .gs content - std::string f(lpszCmdLine); -#ifdef LZMA_SUPPORTED - GSDumpFile* file = (f.size() >= 4) && (f.compare(f.size()-3, 3, ".xz") == 0) - ? (GSDumpFile*) new GSDumpLzma(lpszCmdLine) - : (GSDumpFile*) new GSDumpRaw(lpszCmdLine); -#else - GSDumpFile* file = new GSDumpRaw(lpszCmdLine); -#endif - - uint32 crc; - file->Read(&crc, 4); - GSsetGameCRC(crc, 0); - - GSFreezeData fd; - file->Read(&fd.size, 4); - fd.data = new uint8[fd.size]; - file->Read(fd.data, fd.size); - - GSfreeze(FREEZE_LOAD, &fd); - delete [] fd.data; - - file->Read(regs, 0x2000); - - GSvsync(1); - - - while(!file->IsEof()) - { - uint8 type; - file->Read(&type, 1); - - Packet* p = new Packet(); - - p->type = type; - - switch(type) - { - case 0: - file->Read(&p->param, 1); - file->Read(&p->size, 4); - - switch(p->param) - { - case 0: - p->buff.resize(0x4000); - p->addr = 0x4000 - p->size; - file->Read(&p->buff[p->addr], p->size); - break; - case 1: - case 2: - case 3: - p->buff.resize(p->size); - file->Read(&p->buff[0], p->size); - break; - } - - break; - - case 1: - file->Read(&p->param, 1); - - break; - - case 2: - file->Read(&p->size, 4); - - break; - - case 3: - p->buff.resize(0x2000); - - file->Read(&p->buff[0], 0x2000); - - break; - } - - packets.push_back(p); - } - - delete file; - } - - sleep(1); - - //while(IsWindowVisible(hWnd)) - //FIXME map? - int finished = theApp.GetConfig("linux_replay", 1); - if (theApp.GetConfig("dump", 0)) { - fprintf(stderr, "Dump is enabled. Replay will be disabled\n"); - finished = 1; - } - unsigned long frame_number = 0; - unsigned long total_frame_nb = 0; - while(finished > 0) - { - frame_number = 0; - unsigned long start = timeGetTime(); - for(auto i = packets.begin(); i != packets.end(); i++) - { - Packet* p = *i; - - switch(p->type) - { - case 0: - - switch(p->param) - { - case 0: GSgifTransfer1(&p->buff[0], p->addr); break; - case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break; - case 2: GSgifTransfer3(&p->buff[0], p->size / 16); break; - case 3: GSgifTransfer(&p->buff[0], p->size / 16); break; - } - - break; - - case 1: - - GSvsync(p->param); - frame_number++; - - break; - - case 2: - - if(buff.size() < p->size) buff.resize(p->size); - - GSreadFIFO2(&buff[0], p->size / 16); - - break; - - case 3: - - memcpy(regs, &p->buff[0], 0x2000); - - break; - } - } - - // Ensure the rendering is complete to measure correctly the time. - glFinish(); - - if (finished > 90) { - sleep(1); - } else { - unsigned long end = timeGetTime(); - frame_number = std::max(1ul, frame_number); // avoid a potential division by 0 - - fprintf(stderr, "The %ld frames of the scene was render on %ldms\n", frame_number, end - start); - fprintf(stderr, "A means of %fms by frame\n", (float)(end - start)/(float)frame_number); - - stats.push_back((float)(end - start)); - - finished--; - total_frame_nb += frame_number; - } - } - - if (theApp.GetConfig("linux_replay", 1) > 1) { - // Print some nice stats - // Skip first frame (shader compilation populate the result) - // it divides by 10 the standard deviation... - float n = (float)theApp.GetConfig("linux_replay", 1) - 1.0f; - float mean = 0; - float sd = 0; - for (auto i = stats.begin()+1; i != stats.end(); i++) { - mean += *i; - } - mean = mean/n; - for (auto i = stats.begin()+1; i != stats.end(); i++) { - sd += pow((*i)-mean, 2); - } - sd = sqrt(sd/n); - - fprintf(stderr, "\n\nMean: %fms\n", mean); - fprintf(stderr, "Standard deviation: %fms\n", sd); - fprintf(stderr, "Mean by frame: %fms (%ffps)\n", mean/(float)frame_number, 1000.0f*frame_number/mean); - fprintf(stderr, "Standard deviatin by frame: %fms\n", sd/(float)frame_number); - } -#ifdef ENABLE_OGL_DEBUG_MEM_BW - total_frame_nb *= 1024; - fprintf(stderr, "memory bandwith. T: %f KB/f. V: %f KB/f. U: %f KB/f\n", - (float)g_real_texture_upload_byte/(float)total_frame_nb, - (float)g_vertex_upload_byte/(float)total_frame_nb, - (float)g_uniform_upload_byte/(float)total_frame_nb - ); -#endif - - for(auto i = packets.begin(); i != packets.end(); i++) - { - delete *i; - } - - packets.clear(); - - sleep(1); - - GSclose(); - GSshutdown(); -} -#endif - diff --git a/plugins/GSdx_legacy/GS.h b/plugins/GSdx_legacy/GS.h deleted file mode 100644 index b1c7fb5bfd..0000000000 --- a/plugins/GSdx_legacy/GS.h +++ /dev/null @@ -1,1300 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#define PLUGIN_VERSION 0 - -#define VM_SIZE 4194304 -#define PAGE_SIZE 8192 -#define BLOCK_SIZE 256 -#define COLUMN_SIZE 64 - -#define MAX_PAGES (VM_SIZE / PAGE_SIZE) -#define MAX_BLOCKS (VM_SIZE / BLOCK_SIZE) -#define MAX_COLUMNS (VM_SIZE / COLUMN_SIZE) - -//if defined, will send much info in reply to the API title info queri from PCSX2 -//default should be undefined -//#define GSTITLEINFO_API_FORCE_VERBOSE - -#include "GSVector.h" - -#pragma pack(push, 1) - -enum GS_PRIM -{ - GS_POINTLIST = 0, - GS_LINELIST = 1, - GS_LINESTRIP = 2, - GS_TRIANGLELIST = 3, - GS_TRIANGLESTRIP = 4, - GS_TRIANGLEFAN = 5, - GS_SPRITE = 6, - GS_INVALID = 7, -}; - -enum GS_PRIM_CLASS -{ - GS_POINT_CLASS = 0, - GS_LINE_CLASS = 1, - GS_TRIANGLE_CLASS = 2, - GS_SPRITE_CLASS = 3, - GS_INVALID_CLASS = 7, -}; - -enum GIF_REG -{ - GIF_REG_PRIM = 0x00, - GIF_REG_RGBA = 0x01, - GIF_REG_STQ = 0x02, - GIF_REG_UV = 0x03, - GIF_REG_XYZF2 = 0x04, - GIF_REG_XYZ2 = 0x05, - GIF_REG_TEX0_1 = 0x06, - GIF_REG_TEX0_2 = 0x07, - GIF_REG_CLAMP_1 = 0x08, - GIF_REG_CLAMP_2 = 0x09, - GIF_REG_FOG = 0x0a, - GIF_REG_INVALID = 0x0b, - GIF_REG_XYZF3 = 0x0c, - GIF_REG_XYZ3 = 0x0d, - GIF_REG_A_D = 0x0e, - GIF_REG_NOP = 0x0f, -}; - -enum GIF_REG_COMPLEX -{ - GIF_REG_STQRGBAXYZF2 = 0x00, - GIF_REG_STQRGBAXYZ2 = 0x01, -}; - -enum GIF_A_D_REG -{ - GIF_A_D_REG_PRIM = 0x00, - GIF_A_D_REG_RGBAQ = 0x01, - GIF_A_D_REG_ST = 0x02, - GIF_A_D_REG_UV = 0x03, - GIF_A_D_REG_XYZF2 = 0x04, - GIF_A_D_REG_XYZ2 = 0x05, - GIF_A_D_REG_TEX0_1 = 0x06, - GIF_A_D_REG_TEX0_2 = 0x07, - GIF_A_D_REG_CLAMP_1 = 0x08, - GIF_A_D_REG_CLAMP_2 = 0x09, - GIF_A_D_REG_FOG = 0x0a, - GIF_A_D_REG_XYZF3 = 0x0c, - GIF_A_D_REG_XYZ3 = 0x0d, - GIF_A_D_REG_NOP = 0x0f, - GIF_A_D_REG_TEX1_1 = 0x14, - GIF_A_D_REG_TEX1_2 = 0x15, - GIF_A_D_REG_TEX2_1 = 0x16, - GIF_A_D_REG_TEX2_2 = 0x17, - GIF_A_D_REG_XYOFFSET_1 = 0x18, - GIF_A_D_REG_XYOFFSET_2 = 0x19, - GIF_A_D_REG_PRMODECONT = 0x1a, - GIF_A_D_REG_PRMODE = 0x1b, - GIF_A_D_REG_TEXCLUT = 0x1c, - GIF_A_D_REG_SCANMSK = 0x22, - GIF_A_D_REG_MIPTBP1_1 = 0x34, - GIF_A_D_REG_MIPTBP1_2 = 0x35, - GIF_A_D_REG_MIPTBP2_1 = 0x36, - GIF_A_D_REG_MIPTBP2_2 = 0x37, - GIF_A_D_REG_TEXA = 0x3b, - GIF_A_D_REG_FOGCOL = 0x3d, - GIF_A_D_REG_TEXFLUSH = 0x3f, - GIF_A_D_REG_SCISSOR_1 = 0x40, - GIF_A_D_REG_SCISSOR_2 = 0x41, - GIF_A_D_REG_ALPHA_1 = 0x42, - GIF_A_D_REG_ALPHA_2 = 0x43, - GIF_A_D_REG_DIMX = 0x44, - GIF_A_D_REG_DTHE = 0x45, - GIF_A_D_REG_COLCLAMP = 0x46, - GIF_A_D_REG_TEST_1 = 0x47, - GIF_A_D_REG_TEST_2 = 0x48, - GIF_A_D_REG_PABE = 0x49, - GIF_A_D_REG_FBA_1 = 0x4a, - GIF_A_D_REG_FBA_2 = 0x4b, - GIF_A_D_REG_FRAME_1 = 0x4c, - GIF_A_D_REG_FRAME_2 = 0x4d, - GIF_A_D_REG_ZBUF_1 = 0x4e, - GIF_A_D_REG_ZBUF_2 = 0x4f, - GIF_A_D_REG_BITBLTBUF = 0x50, - GIF_A_D_REG_TRXPOS = 0x51, - GIF_A_D_REG_TRXREG = 0x52, - GIF_A_D_REG_TRXDIR = 0x53, - GIF_A_D_REG_HWREG = 0x54, - GIF_A_D_REG_SIGNAL = 0x60, - GIF_A_D_REG_FINISH = 0x61, - GIF_A_D_REG_LABEL = 0x62, -}; - -enum GIF_FLG -{ - GIF_FLG_PACKED = 0, - GIF_FLG_REGLIST = 1, - GIF_FLG_IMAGE = 2, - GIF_FLG_IMAGE2 = 3 -}; - -enum GS_PSM -{ - PSM_PSMCT32 = 0, // 0000-0000 - PSM_PSMCT24 = 1, // 0000-0001 - PSM_PSMCT16 = 2, // 0000-0010 - PSM_PSMCT16S = 10, // 0000-1010 - PSM_PSMT8 = 19, // 0001-0011 - PSM_PSMT4 = 20, // 0001-0100 - PSM_PSMT8H = 27, // 0001-1011 - PSM_PSMT4HL = 36, // 0010-0100 - PSM_PSMT4HH = 44, // 0010-1100 - PSM_PSMZ32 = 48, // 0011-0000 - PSM_PSMZ24 = 49, // 0011-0001 - PSM_PSMZ16 = 50, // 0011-0010 - PSM_PSMZ16S = 58, // 0011-1010 -}; - -enum GS_TFX -{ - TFX_MODULATE = 0, - TFX_DECAL = 1, - TFX_HIGHLIGHT = 2, - TFX_HIGHLIGHT2 = 3, - TFX_NONE = 4, -}; - -enum GS_CLAMP -{ - CLAMP_REPEAT = 0, - CLAMP_CLAMP = 1, - CLAMP_REGION_CLAMP = 2, - CLAMP_REGION_REPEAT = 3, -}; - -enum GS_ZTST -{ - ZTST_NEVER = 0, - ZTST_ALWAYS = 1, - ZTST_GEQUAL = 2, - ZTST_GREATER = 3, -}; - -enum GS_ATST -{ - ATST_NEVER = 0, - ATST_ALWAYS = 1, - ATST_LESS = 2, - ATST_LEQUAL = 3, - ATST_EQUAL = 4, - ATST_GEQUAL = 5, - ATST_GREATER = 6, - ATST_NOTEQUAL = 7, -}; - -enum GS_AFAIL -{ - AFAIL_KEEP = 0, - AFAIL_FB_ONLY = 1, - AFAIL_ZB_ONLY = 2, - AFAIL_RGB_ONLY = 3, -}; - -enum class GSRendererType : int8_t -{ - Undefined = -1, - - DX9_HW = 0, - DX9_SW = 1, - DX9_OpenCL = 14, - DX9_Null = 2, - - DX1011_HW = 3, - DX1011_SW = 4, - DX1011_OpenCL = 15, - DX1011_Null = 5, - - Null_HW = 9, - Null_SW = 10, - Null_OpenCL = 16, - Null_Null = 11, - - OGL_HW = 12, - OGL_SW = 13, - OGL_OpenCL = 17, - -#ifdef _WIN32 - Default = DX9_HW -#else - // Use ogl renderer as default otherwise it crash at startup - // GSRenderOGL only GSDeviceOGL (not GSDeviceNULL) - Default = OGL_HW -#endif - -}; - - -#define REG32(name) \ -union name \ -{ \ - uint32 u32; \ - struct { \ - -#define REG64(name) \ -union name \ -{ \ - uint64 u64; \ - uint32 u32[2]; \ - void operator = (const GSVector4i& v) {GSVector4i::storel(this, v);} \ - bool operator == (const union name& r) const {return ((GSVector4i)r).eq(*this);} \ - bool operator != (const union name& r) const {return !((GSVector4i)r).eq(*this);} \ - operator GSVector4i() const {return GSVector4i::loadl(this);} \ - struct { \ - -#define REG128(name)\ -union name \ -{ \ - uint64 u64[2]; \ - uint32 u32[4]; \ - struct { \ - -#define REG32_(prefix, name) REG32(prefix##name) -#define REG64_(prefix, name) REG64(prefix##name) -#define REG128_(prefix, name) REG128(prefix##name) - -#define REG_END }; }; -#define REG_END2 }; - -#define REG32_SET(name) \ -union name \ -{ \ - uint32 u32; \ - -#define REG64_SET(name) \ -union name \ -{ \ - uint64 u64; \ - uint32 u32[2]; \ - -#define REG128_SET(name)\ -union name \ -{ \ - __m128i m128; \ - uint64 u64[2]; \ - uint32 u32[4]; \ - -#define REG_SET_END }; - -REG64_(GSReg, BGCOLOR) - uint8 R; - uint8 G; - uint8 B; - uint8 _PAD1[5]; -REG_END - -REG64_(GSReg, BUSDIR) - uint32 DIR:1; - uint32 _PAD1:31; - uint32 _PAD2:32; -REG_END - -REG64_(GSReg, CSR) - uint32 rSIGNAL:1; - uint32 rFINISH:1; - uint32 rHSINT:1; - uint32 rVSINT:1; - uint32 rEDWINT:1; - uint32 rZERO1:1; - uint32 rZERO2:1; - uint32 r_PAD1:1; - uint32 rFLUSH:1; - uint32 rRESET:1; - uint32 r_PAD2:2; - uint32 rNFIELD:1; - uint32 rFIELD:1; - uint32 rFIFO:2; - uint32 rREV:8; - uint32 rID:8; - uint32 wSIGNAL:1; - uint32 wFINISH:1; - uint32 wHSINT:1; - uint32 wVSINT:1; - uint32 wEDWINT:1; - uint32 wZERO1:1; - uint32 wZERO2:1; - uint32 w_PAD1:1; - uint32 wFLUSH:1; - uint32 wRESET:1; - uint32 w_PAD2:2; - uint32 wNFIELD:1; - uint32 wFIELD:1; - uint32 wFIFO:2; - uint32 wREV:8; - uint32 wID:8; -REG_END - -REG64_(GSReg, DISPFB) // (-1/2) - uint32 FBP:9; - uint32 FBW:6; - uint32 PSM:5; - uint32 _PAD:12; - uint32 DBX:11; - uint32 DBY:11; - uint32 _PAD2:10; -REG_END2 - uint32 Block() const {return FBP << 5;} -REG_END2 - -REG64_(GSReg, DISPLAY) // (-1/2) - uint32 DX:12; - uint32 DY:11; - uint32 MAGH:4; - uint32 MAGV:2; - uint32 _PAD:3; - uint32 DW:12; - uint32 DH:11; - uint32 _PAD2:9; -REG_END - -REG64_(GSReg, EXTBUF) - uint32 EXBP:14; - uint32 EXBW:6; - uint32 FBIN:2; - uint32 WFFMD:1; - uint32 EMODA:2; - uint32 EMODC:2; - uint32 _PAD1:5; - uint32 WDX:11; - uint32 WDY:11; - uint32 _PAD2:10; -REG_END - -REG64_(GSReg, EXTDATA) - uint32 SX:12; - uint32 SY:11; - uint32 SMPH:4; - uint32 SMPV:2; - uint32 _PAD1:3; - uint32 WW:12; - uint32 WH:11; - uint32 _PAD2:9; -REG_END - -REG64_(GSReg, EXTWRITE) - uint32 WRITE:1; - uint32 _PAD1:31; - uint32 _PAD2:32; -REG_END - -REG64_(GSReg, IMR) - uint32 _PAD1:8; - uint32 SIGMSK:1; - uint32 FINISHMSK:1; - uint32 HSMSK:1; - uint32 VSMSK:1; - uint32 EDWMSK:1; - uint32 _PAD2:19; - uint32 _PAD3:32; -REG_END - -REG64_(GSReg, PMODE) -union -{ - struct - { - uint32 EN1:1; - uint32 EN2:1; - uint32 CRTMD:3; - uint32 MMOD:1; - uint32 AMOD:1; - uint32 SLBG:1; - uint32 ALP:8; - uint32 _PAD:16; - uint32 _PAD1:32; - }; - - struct - { - uint32 EN:2; - uint32 _PAD2:30; - uint32 _PAD3:32; - }; -}; -REG_END - -REG64_(GSReg, SIGLBLID) - uint32 SIGID; - uint32 LBLID; -REG_END - -REG64_(GSReg, SMODE1) - uint32 RC:3; - uint32 LC:7; - uint32 T1248:2; - uint32 SLCK:1; - uint32 CMOD:2; - uint32 EX:1; - uint32 PRST:1; - uint32 SINT:1; - uint32 XPCK:1; - uint32 PCK2:2; - uint32 SPML:4; - uint32 GCONT:1; // YCrCb - uint32 PHS:1; - uint32 PVS:1; - uint32 PEHS:1; - uint32 PEVS:1; - uint32 CLKSEL:2; - uint32 NVCK:1; - uint32 SLCK2:1; - uint32 VCKSEL:2; - uint32 VHP:1; - uint32 _PAD1:27; -REG_END - -/* - -// pal - -CLKSEL=1 CMOD=3 EX=0 GCONT=0 LC=32 NVCK=1 PCK2=0 PEHS=0 PEVS=0 PHS=0 PRST=1 PVS=0 RC=4 SINT=0 SLCK=0 SLCK2=1 SPML=4 T1248=1 VCKSEL=1 VHP=0 XPCK=0 - -// ntsc - -CLKSEL=1 CMOD=2 EX=0 GCONT=0 LC=32 NVCK=1 PCK2=0 PEHS=0 PEVS=0 PHS=0 PRST=1 PVS=0 RC=4 SINT=0 SLCK=0 SLCK2=1 SPML=4 T1248=1 VCKSEL=1 VHP=0 XPCK=0 - -// ntsc progressive (SoTC) - -CLKSEL=1 CMOD=0 EX=0 GCONT=0 LC=32 NVCK=1 PCK2=0 PEHS=0 PEVS=0 PHS=0 PRST=1 PVS=0 RC=4 SINT=0 SLCK=0 SLCK2=1 SPML=2 T1248=1 VCKSEL=1 VHP=1 XPCK=0 - -*/ - -REG64_(GSReg, SMODE2) - uint32 INT:1; - uint32 FFMD:1; - uint32 DPMS:2; - uint32 _PAD2:28; - uint32 _PAD3:32; -REG_END - -REG64_(GSReg, SRFSH) - uint32 _DUMMY; - // TODO -REG_END - -REG64_(GSReg, SYNCH1) - uint32 _DUMMY; - // TODO -REG_END - -REG64_(GSReg, SYNCH2) - uint32 _DUMMY; - // TODO -REG_END - -REG64_(GSReg, SYNCV) - uint64 _DUMMY; - // TODO -REG_END - -REG64_SET(GSReg) - GSRegBGCOLOR BGCOLOR; - GSRegBUSDIR BUSDIR; - GSRegCSR CSR; - GSRegDISPFB DISPFB; - GSRegDISPLAY DISPLAY; - GSRegEXTBUF EXTBUF; - GSRegEXTDATA EXTDATA; - GSRegEXTWRITE EXTWRITE; - GSRegIMR IMR; - GSRegPMODE PMODE; - GSRegSIGLBLID SIGLBLID; - GSRegSMODE1 SMODE1; - GSRegSMODE2 SMODE2; -REG_SET_END - -// -// GIFTag - -REG128(GIFTag) - uint32 NLOOP:15; - uint32 EOP:1; - uint32 _PAD1:16; - uint32 _PAD2:14; - uint32 PRE:1; - uint32 PRIM:11; - uint32 FLG:2; // enum GIF_FLG - uint32 NREG:4; - uint64 REGS; -REG_END - -// GIFReg - -REG64_(GIFReg, ALPHA) - uint32 A:2; - uint32 B:2; - uint32 C:2; - uint32 D:2; - uint32 _PAD1:24; - uint8 FIX; - uint8 _PAD2[3]; -REG_END2 - // opaque => output will be Cs/As - __forceinline bool IsOpaque() const {return ((A == B || (C == 2 && FIX == 0)) && D == 0) || (A == 0 && B == D && C == 2 && FIX == 0x80);} - __forceinline bool IsOpaque(int amin, int amax) const {return ((A == B || amax == 0) && D == 0) || (A == 0 && B == D && amin == 0x80 && amax == 0x80);} - __forceinline bool IsCd() { return (A == B) && (D == 1);} -REG_END2 - -REG64_(GIFReg, BITBLTBUF) - uint32 SBP:14; - uint32 _PAD1:2; - uint32 SBW:6; - uint32 _PAD2:2; - uint32 SPSM:6; - uint32 _PAD3:2; - uint32 DBP:14; - uint32 _PAD4:2; - uint32 DBW:6; - uint32 _PAD5:2; - uint32 DPSM:6; - uint32 _PAD6:2; -REG_END - -REG64_(GIFReg, CLAMP) -union -{ - struct - { - uint32 WMS:2; - uint32 WMT:2; - uint32 MINU:10; - uint32 MAXU:10; - uint32 _PAD1:8; - uint32 _PAD2:2; - uint32 MAXV:10; - uint32 _PAD3:20; - }; - - struct - { - uint64 _PAD4:24; - uint64 MINV:10; - uint64 _PAD5:30; - }; -}; -REG_END - -REG64_(GIFReg, COLCLAMP) - uint32 CLAMP:1; - uint32 _PAD1:31; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, DIMX) - int32 DM00:3; - int32 _PAD00:1; - int32 DM01:3; - int32 _PAD01:1; - int32 DM02:3; - int32 _PAD02:1; - int32 DM03:3; - int32 _PAD03:1; - int32 DM10:3; - int32 _PAD10:1; - int32 DM11:3; - int32 _PAD11:1; - int32 DM12:3; - int32 _PAD12:1; - int32 DM13:3; - int32 _PAD13:1; - int32 DM20:3; - int32 _PAD20:1; - int32 DM21:3; - int32 _PAD21:1; - int32 DM22:3; - int32 _PAD22:1; - int32 DM23:3; - int32 _PAD23:1; - int32 DM30:3; - int32 _PAD30:1; - int32 DM31:3; - int32 _PAD31:1; - int32 DM32:3; - int32 _PAD32:1; - int32 DM33:3; - int32 _PAD33:1; -REG_END - -REG64_(GIFReg, DTHE) - uint32 DTHE:1; - uint32 _PAD1:31; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, FBA) - uint32 FBA:1; - uint32 _PAD1:31; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, FINISH) - uint32 _PAD1[2]; -REG_END - -REG64_(GIFReg, FOG) - uint8 _PAD1[7]; - uint8 F; -REG_END - -REG64_(GIFReg, FOGCOL) - uint8 FCR; - uint8 FCG; - uint8 FCB; - uint8 _PAD1[5]; -REG_END - -REG64_(GIFReg, FRAME) - uint32 FBP:9; - uint32 _PAD1:7; - uint32 FBW:6; - uint32 _PAD2:2; - uint32 PSM:6; - uint32 _PAD3:2; - uint32 FBMSK; -REG_END2 - uint32 Block() const {return FBP << 5;} -REG_END2 - -REG64_(GIFReg, HWREG) - uint32 DATA_LOWER; - uint32 DATA_UPPER; -REG_END - -REG64_(GIFReg, LABEL) - uint32 ID; - uint32 IDMSK; -REG_END - -REG64_(GIFReg, MIPTBP1) - uint64 TBP1:14; - uint64 TBW1:6; - uint64 TBP2:14; - uint64 TBW2:6; - uint64 TBP3:14; - uint64 TBW3:6; - uint64 _PAD:4; -REG_END - -REG64_(GIFReg, MIPTBP2) - uint64 TBP4:14; - uint64 TBW4:6; - uint64 TBP5:14; - uint64 TBW5:6; - uint64 TBP6:14; - uint64 TBW6:6; - uint64 _PAD:4; -REG_END - -REG64_(GIFReg, NOP) - uint32 _PAD[2]; -REG_END - -REG64_(GIFReg, PABE) - uint32 PABE:1; - uint32 _PAD1:31; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, PRIM) - uint32 PRIM:3; - uint32 IIP:1; - uint32 TME:1; - uint32 FGE:1; - uint32 ABE:1; - uint32 AA1:1; - uint32 FST:1; - uint32 CTXT:1; - uint32 FIX:1; - uint32 _PAD1:21; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, PRMODE) - uint32 _PRIM:3; - uint32 IIP:1; - uint32 TME:1; - uint32 FGE:1; - uint32 ABE:1; - uint32 AA1:1; - uint32 FST:1; - uint32 CTXT:1; - uint32 FIX:1; - uint32 _PAD2:21; - uint32 _PAD3:32; -REG_END - -REG64_(GIFReg, PRMODECONT) - uint32 AC:1; - uint32 _PAD1:31; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, RGBAQ) - uint8 R; - uint8 G; - uint8 B; - uint8 A; - float Q; -REG_END - -REG64_(GIFReg, SCANMSK) - uint32 MSK:2; - uint32 _PAD1:30; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, SCISSOR) - uint32 SCAX0:11; - uint32 _PAD1:5; - uint32 SCAX1:11; - uint32 _PAD2:5; - uint32 SCAY0:11; - uint32 _PAD3:5; - uint32 SCAY1:11; - uint32 _PAD4:5; -REG_END - -REG64_(GIFReg, SIGNAL) - uint32 ID; - uint32 IDMSK; -REG_END - -REG64_(GIFReg, ST) - float S; - float T; -REG_END - -REG64_(GIFReg, TEST) - uint32 ATE:1; - uint32 ATST:3; - uint32 AREF:8; - uint32 AFAIL:2; - uint32 DATE:1; - uint32 DATM:1; - uint32 ZTE:1; - uint32 ZTST:2; - uint32 _PAD1:13; - uint32 _PAD2:32; -REG_END2 - __forceinline bool DoFirstPass() const {return !ATE || ATST != ATST_NEVER;} // not all pixels fail automatically - __forceinline bool DoSecondPass() const {return ATE && ATST != ATST_ALWAYS && AFAIL != AFAIL_KEEP;} // pixels may fail, write fb/z - __forceinline bool NoSecondPass() const {return ATE && ATST != ATST_ALWAYS && AFAIL == AFAIL_KEEP;} // pixels may fail, no output -REG_END2 - -REG64_(GIFReg, TEX0) -union -{ - struct - { - uint32 TBP0:14; - uint32 TBW:6; - uint32 PSM:6; - uint32 TW:4; - uint32 _PAD1:2; - uint32 _PAD2:2; - uint32 TCC:1; - uint32 TFX:2; - uint32 CBP:14; - uint32 CPSM:4; - uint32 CSM:1; - uint32 CSA:5; - uint32 CLD:3; - }; - - struct - { - uint64 _PAD3:30; - uint64 TH:4; - uint64 _PAD4:30; - }; -}; -REG_END2 - __forceinline bool IsRepeating() const - { - if(TBW < 2) - { - if(PSM == PSM_PSMT8) return TW > 7 || TH > 6; - if(PSM == PSM_PSMT4) return TW > 7 || TH > 7; - } - - // The recast of TBW seems useless but it avoid tons of warning from GCC... - return ((uint32)TBW << 6u) < (1u << TW); - } -REG_END2 - -REG64_(GIFReg, TEX1) - uint32 LCM:1; - uint32 _PAD1:1; - uint32 MXL:3; - uint32 MMAG:1; - uint32 MMIN:3; - uint32 MTBA:1; - uint32 _PAD2:9; - uint32 L:2; - uint32 _PAD3:11; - int32 K:12; // 1:7:4 - uint32 _PAD4:20; -REG_END2 - bool IsMinLinear() const {return (MMIN == 1) || (MMIN & 4);} - bool IsMagLinear() const {return MMAG;} -REG_END2 - -REG64_(GIFReg, TEX2) - uint32 _PAD1:20; - uint32 PSM:6; - uint32 _PAD2:6; - uint32 _PAD3:5; - uint32 CBP:14; - uint32 CPSM:4; - uint32 CSM:1; - uint32 CSA:5; - uint32 CLD:3; -REG_END - -REG64_(GIFReg, TEXA) - uint8 TA0; - uint8 _PAD1:7; - uint8 AEM:1; - uint16 _PAD2; - uint8 TA1:8; - uint8 _PAD3[3]; -REG_END - -REG64_(GIFReg, TEXCLUT) - uint32 CBW:6; - uint32 COU:6; - uint32 COV:10; - uint32 _PAD1:10; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, TEXFLUSH) - uint32 _PAD1:32; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, TRXDIR) - uint32 XDIR:2; - uint32 _PAD1:30; - uint32 _PAD2:32; -REG_END - -REG64_(GIFReg, TRXPOS) - uint32 SSAX:11; - uint32 _PAD1:5; - uint32 SSAY:11; - uint32 _PAD2:5; - uint32 DSAX:11; - uint32 _PAD3:5; - uint32 DSAY:11; - uint32 DIRY:1; - uint32 DIRX:1; - uint32 _PAD4:3; -REG_END - -REG64_(GIFReg, TRXREG) - uint32 RRW:12; - uint32 _PAD1:20; - uint32 RRH:12; - uint32 _PAD2:20; -REG_END - -// GSState::GIFPackedRegHandlerUV and GSState::GIFRegHandlerUV will make sure that the _PAD1/2 bits are set to zero - -REG64_(GIFReg, UV) - uint16 U; -// uint32 _PAD1:2; - uint16 V; -// uint32 _PAD2:2; - uint32 _PAD3; -REG_END - -// GSState::GIFRegHandlerXYOFFSET will make sure that the _PAD1/2 bits are set to zero - -REG64_(GIFReg, XYOFFSET) - uint32 OFX; // :16; uint32 _PAD1:16; - uint32 OFY; // :16; uint32 _PAD2:16; -REG_END - -REG64_(GIFReg, XYZ) - uint16 X; - uint16 Y; - uint32 Z; -REG_END - -REG64_(GIFReg, XYZF) - uint16 X; - uint16 Y; - uint32 Z:24; - uint32 F:8; -REG_END - -REG64_(GIFReg, ZBUF) - uint32 ZBP:9; - uint32 _PAD1:15; - // uint32 PSM:4; - // uint32 _PAD2:4; - uint32 PSM:6; - uint32 _PAD2:2; - uint32 ZMSK:1; - uint32 _PAD3:31; -REG_END2 - uint32 Block() const {return ZBP << 5;} -REG_END2 - -REG64_SET(GIFReg) - GIFRegALPHA ALPHA; - GIFRegBITBLTBUF BITBLTBUF; - GIFRegCLAMP CLAMP; - GIFRegCOLCLAMP COLCLAMP; - GIFRegDIMX DIMX; - GIFRegDTHE DTHE; - GIFRegFBA FBA; - GIFRegFINISH FINISH; - GIFRegFOG FOG; - GIFRegFOGCOL FOGCOL; - GIFRegFRAME FRAME; - GIFRegHWREG HWREG; - GIFRegLABEL LABEL; - GIFRegMIPTBP1 MIPTBP1; - GIFRegMIPTBP2 MIPTBP2; - GIFRegNOP NOP; - GIFRegPABE PABE; - GIFRegPRIM PRIM; - GIFRegPRMODE PRMODE; - GIFRegPRMODECONT PRMODECONT; - GIFRegRGBAQ RGBAQ; - GIFRegSCANMSK SCANMSK; - GIFRegSCISSOR SCISSOR; - GIFRegSIGNAL SIGNAL; - GIFRegST ST; - GIFRegTEST TEST; - GIFRegTEX0 TEX0; - GIFRegTEX1 TEX1; - GIFRegTEX2 TEX2; - GIFRegTEXA TEXA; - GIFRegTEXCLUT TEXCLUT; - GIFRegTEXFLUSH TEXFLUSH; - GIFRegTRXDIR TRXDIR; - GIFRegTRXPOS TRXPOS; - GIFRegTRXREG TRXREG; - GIFRegUV UV; - GIFRegXYOFFSET XYOFFSET; - GIFRegXYZ XYZ; - GIFRegXYZF XYZF; - GIFRegZBUF ZBUF; -REG_SET_END - -// GIFPacked - -REG128_(GIFPacked, PRIM) - uint32 PRIM:11; - uint32 _PAD1:21; - uint32 _PAD2[3]; -REG_END - -REG128_(GIFPacked, RGBA) - uint8 R; - uint8 _PAD1[3]; - uint8 G; - uint8 _PAD2[3]; - uint8 B; - uint8 _PAD3[3]; - uint8 A; - uint8 _PAD4[3]; -REG_END - -REG128_(GIFPacked, STQ) - float S; - float T; - float Q; - uint32 _PAD1:32; -REG_END - -REG128_(GIFPacked, UV) - uint32 U:14; - uint32 _PAD1:18; - uint32 V:14; - uint32 _PAD2:18; - uint32 _PAD3:32; - uint32 _PAD4:32; -REG_END - -REG128_(GIFPacked, XYZF2) - uint16 X; - uint16 _PAD1; - uint16 Y; - uint16 _PAD2; - uint32 _PAD3:4; - uint32 Z:24; - uint32 _PAD4:4; - uint32 _PAD5:4; - uint32 F:8; - uint32 _PAD6:3; - uint32 ADC:1; - uint32 _PAD7:16; -REG_END2 - uint32 Skip() const {return u32[3] & 0x8000;} -REG_END2 - -REG128_(GIFPacked, XYZ2) - uint16 X; - uint16 _PAD1; - uint16 Y; - uint16 _PAD2; - uint32 Z; - uint32 _PAD3:15; - uint32 ADC:1; - uint32 _PAD4:16; -REG_END2 - uint32 Skip() const {return u32[3] & 0x8000;} -REG_END2 - -REG128_(GIFPacked, FOG) - uint32 _PAD1; - uint32 _PAD2; - uint32 _PAD3; - uint32 _PAD4:4; - uint32 F:8; - uint32 _PAD5:20; -REG_END - -REG128_(GIFPacked, A_D) - uint64 DATA; - uint8 ADDR:8; // enum GIF_A_D_REG - uint8 _PAD1[3+4]; -REG_END - -REG128_(GIFPacked, NOP) - uint32 _PAD1; - uint32 _PAD2; - uint32 _PAD3; - uint32 _PAD4; -REG_END - -REG128_SET(GIFPackedReg) - GIFReg r; - GIFPackedPRIM PRIM; - GIFPackedRGBA RGBA; - GIFPackedSTQ STQ; - GIFPackedUV UV; - GIFPackedXYZF2 XYZF2; - GIFPackedXYZ2 XYZ2; - GIFPackedFOG FOG; - GIFPackedA_D A_D; - GIFPackedNOP NOP; -REG_SET_END - -__aligned(struct, 32) GIFPath -{ - GIFTag tag; - uint32 nloop; - uint32 nreg; - uint32 reg; - uint32 type; - GSVector4i regs; - - enum {TYPE_UNKNOWN, TYPE_ADONLY, TYPE_STQRGBAXYZF2, TYPE_STQRGBAXYZ2}; - - __forceinline void SetTag(const void* mem) - { - const GIFTag* RESTRICT src = (const GIFTag*)mem; - - // the compiler has a hard time not reloading every time a field of src is accessed - - uint32 a = src->u32[0]; - uint32 b = src->u32[1]; - - tag.u32[0] = a; - tag.u32[1] = b; - - nloop = a & 0x7fff; - - if(nloop == 0) return; - - GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though - - nreg = (b & 0xf0000000) ? (b >> 28) : 16; // src->NREG - regs = v.upl8(v >> 4) & GSVector4i::x0f(nreg); - reg = 0; - - type = TYPE_UNKNOWN; - - if(tag.FLG == GIF_FLG_PACKED) - { - if(regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1) - { - type = TYPE_ADONLY; - } - else - { - switch(nreg) - { - case 1: break; - case 2: break; - case 3: - if(regs.u32[0] == 0x00040102) type = TYPE_STQRGBAXYZF2; // many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f) - if(regs.u32[0] == 0x00050102) type = TYPE_STQRGBAXYZ2; // GoW (has other crazy formats, like ...030503050103) - // TODO: common types with UV instead - break; - case 4: break; - case 5: break; - case 6: break; - case 7: break; - case 8: break; - case 9: - if(regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x00000004) {type = TYPE_STQRGBAXYZF2; nreg = 3; nloop *= 3;} // ffx - break; - case 10: break; - case 11: break; - case 12: - if(regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x04010204) {type = TYPE_STQRGBAXYZF2; nreg = 3; nloop *= 4;} // dq8 (not many, mostly 040102) - break; - case 13: break; - case 14: break; - case 15: break; - case 16: break; - default: - __assume(0); - } - } - } - } - - __forceinline uint8 GetReg() const - { - return regs.u8[reg]; - } - - __forceinline uint8 GetReg(uint32 index) const - { - return regs.u8[index]; - } - - __forceinline bool StepReg() - { - if(++reg == nreg) - { - reg = 0; - - if(--nloop == 0) - { - return false; - } - } - - return true; - } -}; - -struct GSPrivRegSet -{ - union - { - struct - { - GSRegPMODE PMODE; - uint64 _pad1; - GSRegSMODE1 SMODE1; - uint64 _pad2; - GSRegSMODE2 SMODE2; - uint64 _pad3; - GSRegSRFSH SRFSH; - uint64 _pad4; - GSRegSYNCH1 SYNCH1; - uint64 _pad5; - GSRegSYNCH2 SYNCH2; - uint64 _pad6; - GSRegSYNCV SYNCV; - uint64 _pad7; - struct { - GSRegDISPFB DISPFB; - uint64 _pad1; - GSRegDISPLAY DISPLAY; - uint64 _pad2; - } DISP[2]; - GSRegEXTBUF EXTBUF; - uint64 _pad8; - GSRegEXTDATA EXTDATA; - uint64 _pad9; - GSRegEXTWRITE EXTWRITE; - uint64 _pad10; - GSRegBGCOLOR BGCOLOR; - uint64 _pad11; - }; - - uint8 _pad12[0x1000]; - }; - - union - { - struct - { - GSRegCSR CSR; - uint64 _pad13; - GSRegIMR IMR; - uint64 _pad14; - uint64 _unk1[4]; - GSRegBUSDIR BUSDIR; - uint64 _pad15; - uint64 _unk2[6]; - GSRegSIGLBLID SIGLBLID; - uint64 _pad16; - }; - - uint8 _pad17[0x1000]; - }; -}; - -#pragma pack(pop) - -enum {KEYPRESS=1, KEYRELEASE=2}; -struct GSKeyEventData {uint32 key, type;}; - -enum {FREEZE_LOAD=0, FREEZE_SAVE=1, FREEZE_SIZE=2}; -struct GSFreezeData {int size; uint8* data;}; - -enum stateType {ST_WRITE, ST_TRANSFER, ST_VSYNC}; - -// default gs config settings -#define DEFAULT_EXTRA_RENDERING_THREADS 2 - -// GS Video modes macros -#define Vmode_VESA_DTV (m_regs->SMODE1.CMOD == 0) -#define Vmode_NTSC (m_regs->SMODE1.CMOD == 2) -#define Vmode_PAL (m_regs->SMODE1.CMOD == 3) -#define Vmode_VESA_1A (m_regs->SMODE1.LC == 15 && Vmode_VESA_DTV) -#define Vmode_VESA_1C (m_regs->SMODE1.LC == 28 && Vmode_VESA_DTV) -#define Vmode_VESA_2B (m_regs->SMODE1.LC == 71 && Vmode_VESA_DTV) -#define Vmode_VESA_2D (m_regs->SMODE1.LC == 44 && Vmode_VESA_DTV) -#define Vmode_VESA_3B (m_regs->SMODE1.LC == 58 && Vmode_VESA_DTV) -#define Vmode_VESA_3D (m_regs->SMODE1.LC == 35 && Vmode_VESA_DTV) -#define Vmode_VESA_4A (m_regs->SMODE1.LC == 8 && Vmode_VESA_DTV) -#define Vmode_VESA_4B (m_regs->SMODE1.LC == 10 && Vmode_VESA_DTV) -#define Vmode_DTV_480P (m_regs->SMODE1.LC == 32 && Vmode_VESA_DTV) -#define Vmode_DTV_720P_1080I (m_regs->SMODE1.LC == 22 && Vmode_VESA_DTV) diff --git a/plugins/GSdx_legacy/GSAlignedClass.cpp b/plugins/GSdx_legacy/GSAlignedClass.cpp deleted file mode 100644 index 6940b5ae8b..0000000000 --- a/plugins/GSdx_legacy/GSAlignedClass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSAlignedClass.h" diff --git a/plugins/GSdx_legacy/GSAlignedClass.h b/plugins/GSdx_legacy/GSAlignedClass.h deleted file mode 100644 index 81e83180b0..0000000000 --- a/plugins/GSdx_legacy/GSAlignedClass.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -template class GSAlignedClass -{ -public: - GSAlignedClass() {} - virtual ~GSAlignedClass() {} - - void* operator new (size_t size) - { - return _aligned_malloc(size, i); - } - - void operator delete (void* p) - { - _aligned_free(p); - } - - void* operator new [] (size_t size) - { - return _aligned_malloc(size, i); - } - - void operator delete [] (void* p) - { - _aligned_free(p); - } -}; diff --git a/plugins/GSdx_legacy/GSBlock.cpp b/plugins/GSdx_legacy/GSBlock.cpp deleted file mode 100644 index e010f700c7..0000000000 --- a/plugins/GSdx_legacy/GSBlock.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSBlock.h" - -#if _M_SSE >= 0x501 -const GSVector8i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); -#else -const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); -#endif -const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15); -const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); - -#if _M_SSE >= 0x501 -const GSVector8i GSBlock::m_xxxa(0x00008000); -const GSVector8i GSBlock::m_xxbx(0x00007c00); -const GSVector8i GSBlock::m_xgxx(0x000003e0); -const GSVector8i GSBlock::m_rxxx(0x0000001f); -#else -const GSVector4i GSBlock::m_xxxa(0x00008000); -const GSVector4i GSBlock::m_xxbx(0x00007c00); -const GSVector4i GSBlock::m_xgxx(0x000003e0); -const GSVector4i GSBlock::m_rxxx(0x0000001f); -#endif - -const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9); -const GSVector4i GSBlock::m_uw8hmask1(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11); -const GSVector4i GSBlock::m_uw8hmask2(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13); -const GSVector4i GSBlock::m_uw8hmask3(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15); diff --git a/plugins/GSdx_legacy/GSBlock.h b/plugins/GSdx_legacy/GSBlock.h deleted file mode 100644 index a18f597299..0000000000 --- a/plugins/GSdx_legacy/GSBlock.h +++ /dev/null @@ -1,2195 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSTables.h" -#include "GSVector.h" - -class GSBlock -{ - #if _M_SSE >= 0x501 - static const GSVector8i m_r16mask; - #else - static const GSVector4i m_r16mask; - #endif - static const GSVector4i m_r8mask; - static const GSVector4i m_r4mask; - - #if _M_SSE >= 0x501 - static const GSVector8i m_xxxa; - static const GSVector8i m_xxbx; - static const GSVector8i m_xgxx; - static const GSVector8i m_rxxx; - #else - static const GSVector4i m_xxxa; - static const GSVector4i m_xxbx; - static const GSVector4i m_xgxx; - static const GSVector4i m_rxxx; - #endif - - static const GSVector4i m_uw8hmask0; - static const GSVector4i m_uw8hmask1; - static const GSVector4i m_uw8hmask2; - static const GSVector4i m_uw8hmask3; - -public: - template __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - const uint8* RESTRICT s0 = &src[srcpitch * 0]; - const uint8* RESTRICT s1 = &src[srcpitch * 1]; - - #if _M_SSE >= 0x501 - - GSVector8i v0, v1; - - if(alignment == 32) - { - v0 = GSVector8i::load(s0).acbd(); - v1 = GSVector8i::load(s1).acbd(); - - GSVector8i::sw64(v0, v1); - } - else - { - if(alignment == 16) - { - v0 = GSVector8i::load(&s0[0], &s0[16]).acbd(); - v1 = GSVector8i::load(&s1[0], &s1[16]).acbd(); - - GSVector8i::sw64(v0, v1); - } - else - { - //v0 = GSVector8i::load(&s0[0], &s0[16], &s0[8], &s0[24]); - //v1 = GSVector8i::load(&s1[0], &s1[16], &s1[8], &s1[24]); - - GSVector4i v4 = GSVector4i::load(&s0[0], &s1[0]); - GSVector4i v5 = GSVector4i::load(&s0[8], &s1[8]); - GSVector4i v6 = GSVector4i::load(&s0[16], &s1[16]); - GSVector4i v7 = GSVector4i::load(&s0[24], &s1[24]); - - if(mask == 0xffffffff) - { - // just write them out directly - - ((GSVector4i*)dst)[i * 4 + 0] = v4; - ((GSVector4i*)dst)[i * 4 + 1] = v5; - ((GSVector4i*)dst)[i * 4 + 2] = v6; - ((GSVector4i*)dst)[i * 4 + 3] = v7; - - return; - } - - v0 = GSVector8i::cast(v4).insert<1>(v5); - v1 = GSVector8i::cast(v6).insert<1>(v7); - } - } - - if(mask == 0xffffffff) - { - ((GSVector8i*)dst)[i * 2 + 0] = v0; - ((GSVector8i*)dst)[i * 2 + 1] = v1; - } - else - { - GSVector8i v2((int)mask); - - if(mask == 0xff000000 || mask == 0x00ffffff) - { - ((GSVector8i*)dst)[i * 2 + 0] = ((GSVector8i*)dst)[i * 2 + 0].blend8(v0, v2); - ((GSVector8i*)dst)[i * 2 + 1] = ((GSVector8i*)dst)[i * 2 + 1].blend8(v1, v2); - } - else - { - ((GSVector8i*)dst)[i * 2 + 0] = ((GSVector8i*)dst)[i * 2 + 0].blend(v0, v2); - ((GSVector8i*)dst)[i * 2 + 1] = ((GSVector8i*)dst)[i * 2 + 1].blend(v1, v2); - } - } - - #else - - GSVector4i v0, v1, v2, v3; - - if(alignment != 0) - { - v0 = GSVector4i::load(&s0[0]); - v1 = GSVector4i::load(&s0[16]); - v2 = GSVector4i::load(&s1[0]); - v3 = GSVector4i::load(&s1[16]); - - GSVector4i::sw64(v0, v2, v1, v3); - } - else - { - v0 = GSVector4i::load(&s0[0], &s1[0]); - v1 = GSVector4i::load(&s0[8], &s1[8]); - v2 = GSVector4i::load(&s0[16], &s1[16]); - v3 = GSVector4i::load(&s0[24], &s1[24]); - } - - if(mask == 0xffffffff) - { - ((GSVector4i*)dst)[i * 4 + 0] = v0; - ((GSVector4i*)dst)[i * 4 + 1] = v1; - ((GSVector4i*)dst)[i * 4 + 2] = v2; - ((GSVector4i*)dst)[i * 4 + 3] = v3; - } - else - { - GSVector4i v4((int)mask); - - #if _M_SSE >= 0x401 - - if(mask == 0xff000000 || mask == 0x00ffffff) - { - ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, v4); - ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, v4); - ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend8(v2, v4); - ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, v4); - } - else - { - - #endif - - ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend(v0, v4); - ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend(v1, v4); - ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend(v2, v4); - ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend(v3, v4); - - #if _M_SSE >= 0x401 - - } - - #endif - } - - #endif - } - - template __forceinline static void WriteColumn16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - const uint8* RESTRICT s0 = &src[srcpitch * 0]; - const uint8* RESTRICT s1 = &src[srcpitch * 1]; - - // for(int j = 0; j < 16; j++) {((uint16*)s0)[j] = columnTable16[0][j]; ((uint16*)s1)[j] = columnTable16[1][j];} - - #if _M_SSE >= 0x501 - - GSVector8i v0, v1; - - if(alignment == 32) - { - v0 = GSVector8i::load(s0); - v1 = GSVector8i::load(s1); - - GSVector8i::sw128(v0, v1); - GSVector8i::sw16(v0, v1); - } - else - { - if(alignment == 16) - { - v0 = GSVector8i::load(&s0[0], &s1[0]); - v1 = GSVector8i::load(&s0[16], &s1[16]); - } - else - { - v0 = GSVector8i::load(&s0[0], &s0[8], &s1[0], &s1[8]); - v1 = GSVector8i::load(&s0[16], &s0[24], &s1[16], &s1[24]); - } - - GSVector8i::sw16(v0, v1); - } - - v0 = v0.acbd(); - v1 = v1.acbd(); - - ((GSVector8i*)dst)[i * 2 + 0] = v0; - ((GSVector8i*)dst)[i * 2 + 1] = v1; - - #else - - GSVector4i v0, v1, v2, v3; - - if(alignment != 0) - { - v0 = GSVector4i::load(&s0[0]); - v1 = GSVector4i::load(&s0[16]); - v2 = GSVector4i::load(&s1[0]); - v3 = GSVector4i::load(&s1[16]); - - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw64(v0, v1, v2, v3); - } - else - { - v0 = GSVector4i::loadl(&s0[0]).upl16(GSVector4i::loadl(&s0[16])); - v2 = GSVector4i::loadl(&s0[8]).upl16(GSVector4i::loadl(&s0[24])); - v1 = GSVector4i::loadl(&s1[0]).upl16(GSVector4i::loadl(&s1[16])); - v3 = GSVector4i::loadl(&s1[8]).upl16(GSVector4i::loadl(&s1[24])); - - GSVector4i::sw64(v0, v1, v2, v3); - } - - ((GSVector4i*)dst)[i * 4 + 0] = v0; - ((GSVector4i*)dst)[i * 4 + 1] = v2; - ((GSVector4i*)dst)[i * 4 + 2] = v1; - ((GSVector4i*)dst)[i * 4 + 3] = v3; - - #endif - } - - template __forceinline static void WriteColumn8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - // TODO: read unaligned as WriteColumn32 does and try saving a few shuffles - - #if _M_SSE >= 0x501 - - GSVector4i v4 = GSVector4i::load(&src[srcpitch * 0]); - GSVector4i v5 = GSVector4i::load(&src[srcpitch * 1]); - GSVector4i v6 = GSVector4i::load(&src[srcpitch * 2]); - GSVector4i v7 = GSVector4i::load(&src[srcpitch * 3]); - - GSVector8i v0(v4, v5); - GSVector8i v1(v6, v7); - - if((i & 1) == 0) - { - v1 = v1.yxwz(); - } - else - { - v0 = v0.yxwz(); - } - - GSVector8i::sw8(v0, v1); - GSVector8i::sw16(v0, v1); - - v0 = v0.acbd(); - v1 = v1.acbd(); - - ((GSVector8i*)dst)[i * 2 + 0] = v0; - ((GSVector8i*)dst)[i * 2 + 1] = v1; - - #else - - GSVector4i v0 = GSVector4i::load(&src[srcpitch * 0]); - GSVector4i v1 = GSVector4i::load(&src[srcpitch * 1]); - GSVector4i v2 = GSVector4i::load(&src[srcpitch * 2]); - GSVector4i v3 = GSVector4i::load(&src[srcpitch * 3]); - - if((i & 1) == 0) - { - v2 = v2.yxwz(); - v3 = v3.yxwz(); - } - else - { - v0 = v0.yxwz(); - v1 = v1.yxwz(); - } - - GSVector4i::sw8(v0, v2, v1, v3); - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw64(v0, v1, v2, v3); - - ((GSVector4i*)dst)[i * 4 + 0] = v0; - ((GSVector4i*)dst)[i * 4 + 1] = v2; - ((GSVector4i*)dst)[i * 4 + 2] = v1; - ((GSVector4i*)dst)[i * 4 + 3] = v3; - - #endif - } - - template __forceinline static void WriteColumn4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - //printf("WriteColumn4\n"); - - // TODO: read unaligned as WriteColumn32 does and try saving a few shuffles - - // TODO: pshufb - - GSVector4i v0 = GSVector4i::load(&src[srcpitch * 0]); - GSVector4i v1 = GSVector4i::load(&src[srcpitch * 1]); - GSVector4i v2 = GSVector4i::load(&src[srcpitch * 2]); - GSVector4i v3 = GSVector4i::load(&src[srcpitch * 3]); - - if((i & 1) == 0) - { - v2 = v2.yxwzlh(); - v3 = v3.yxwzlh(); - } - else - { - v0 = v0.yxwzlh(); - v1 = v1.yxwzlh(); - } - - GSVector4i::sw4(v0, v2, v1, v3); - GSVector4i::sw8(v0, v1, v2, v3); - GSVector4i::sw8(v0, v2, v1, v3); - GSVector4i::sw64(v0, v2, v1, v3); - - ((GSVector4i*)dst)[i * 4 + 0] = v0; - ((GSVector4i*)dst)[i * 4 + 1] = v1; - ((GSVector4i*)dst)[i * 4 + 2] = v2; - ((GSVector4i*)dst)[i * 4 + 3] = v3; - } - - template static void WriteColumn32(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - switch((y >> 1) & 3) - { - case 0: WriteColumn32<0, alignment, mask>(dst, src, srcpitch); break; - case 1: WriteColumn32<1, alignment, mask>(dst, src, srcpitch); break; - case 2: WriteColumn32<2, alignment, mask>(dst, src, srcpitch); break; - case 3: WriteColumn32<3, alignment, mask>(dst, src, srcpitch); break; - default: __assume(0); - } - } - - template static void WriteColumn16(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - switch((y >> 1) & 3) - { - case 0: WriteColumn16<0, alignment>(dst, src, srcpitch); break; - case 1: WriteColumn16<1, alignment>(dst, src, srcpitch); break; - case 2: WriteColumn16<2, alignment>(dst, src, srcpitch); break; - case 3: WriteColumn16<3, alignment>(dst, src, srcpitch); break; - default: __assume(0); - } - } - - template static void WriteColumn8(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - switch((y >> 2) & 3) - { - case 0: WriteColumn8<0, alignment>(dst, src, srcpitch); break; - case 1: WriteColumn8<1, alignment>(dst, src, srcpitch); break; - case 2: WriteColumn8<2, alignment>(dst, src, srcpitch); break; - case 3: WriteColumn8<3, alignment>(dst, src, srcpitch); break; - default: __assume(0); - } - } - - template static void WriteColumn4(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - switch((y >> 2) & 3) - { - case 0: WriteColumn4<0, alignment>(dst, src, srcpitch); break; - case 1: WriteColumn4<1, alignment>(dst, src, srcpitch); break; - case 2: WriteColumn4<2, alignment>(dst, src, srcpitch); break; - case 3: WriteColumn4<3, alignment>(dst, src, srcpitch); break; - default: __assume(0); - } - } - - template static void WriteBlock32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - WriteColumn32<0, alignment, mask>(dst, src, srcpitch); - src += srcpitch * 2; - WriteColumn32<1, alignment, mask>(dst, src, srcpitch); - src += srcpitch * 2; - WriteColumn32<2, alignment, mask>(dst, src, srcpitch); - src += srcpitch * 2; - WriteColumn32<3, alignment, mask>(dst, src, srcpitch); - } - - template static void WriteBlock16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - WriteColumn16<0, alignment>(dst, src, srcpitch); - src += srcpitch * 2; - WriteColumn16<1, alignment>(dst, src, srcpitch); - src += srcpitch * 2; - WriteColumn16<2, alignment>(dst, src, srcpitch); - src += srcpitch * 2; - WriteColumn16<3, alignment>(dst, src, srcpitch); - } - - template static void WriteBlock8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - WriteColumn8<0, alignment>(dst, src, srcpitch); - src += srcpitch * 4; - WriteColumn8<1, alignment>(dst, src, srcpitch); - src += srcpitch * 4; - WriteColumn8<2, alignment>(dst, src, srcpitch); - src += srcpitch * 4; - WriteColumn8<3, alignment>(dst, src, srcpitch); - } - - template static void WriteBlock4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) - { - WriteColumn4<0, alignment>(dst, src, srcpitch); - src += srcpitch * 4; - WriteColumn4<1, alignment>(dst, src, srcpitch); - src += srcpitch * 4; - WriteColumn4<2, alignment>(dst, src, srcpitch); - src += srcpitch * 4; - WriteColumn4<3, alignment>(dst, src, srcpitch); - } - - template __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - #if _M_SSE >= 0x501 - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i v0 = s[i * 2 + 0]; - GSVector8i v1 = s[i * 2 + 1]; - - GSVector8i::sw128(v0, v1); - GSVector8i::sw64(v0, v1); - - GSVector8i::store(&dst[dstpitch * 0], v0); - GSVector8i::store(&dst[dstpitch * 1], v1); - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = s[i * 4 + 0]; - GSVector4i v1 = s[i * 4 + 1]; - GSVector4i v2 = s[i * 4 + 2]; - GSVector4i v3 = s[i * 4 + 3]; - - GSVector4i::sw64(v0, v1, v2, v3); - - GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0]; - GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1]; - - GSVector4i::store(&d0[0], v0); - GSVector4i::store(&d0[1], v1); - GSVector4i::store(&d1[0], v2); - GSVector4i::store(&d1[1], v3); - - #endif - } - - template __forceinline static void ReadColumn16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - #if _M_SSE >= 0x501 - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i v0 = s[i * 2 + 0].shuffle8(m_r16mask); - GSVector8i v1 = s[i * 2 + 1].shuffle8(m_r16mask); - - GSVector8i::sw128(v0, v1); - GSVector8i::sw32(v0, v1); - - v0 = v0.acbd(); - v1 = v1.acbd(); - - GSVector8i::store(&dst[dstpitch * 0], v0); - GSVector8i::store(&dst[dstpitch * 1], v1); - - #elif _M_SSE >= 0x301 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = s[i * 4 + 0].shuffle8(m_r16mask); - GSVector4i v1 = s[i * 4 + 1].shuffle8(m_r16mask); - GSVector4i v2 = s[i * 4 + 2].shuffle8(m_r16mask); - GSVector4i v3 = s[i * 4 + 3].shuffle8(m_r16mask); - - GSVector4i::sw32(v0, v1, v2, v3); - GSVector4i::sw64(v0, v1, v2, v3); - - GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0]; - GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1]; - - GSVector4i::store(&d0[0], v0); - GSVector4i::store(&d0[1], v2); - GSVector4i::store(&d1[0], v1); - GSVector4i::store(&d1[1], v3); - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = s[i * 4 + 0]; - GSVector4i v1 = s[i * 4 + 1]; - GSVector4i v2 = s[i * 4 + 2]; - GSVector4i v3 = s[i * 4 + 3]; - - //for(int16 i = 0; i < 8; i++) {v0.i16[i] = i; v1.i16[i] = i + 8; v2.i16[i] = i + 16; v3.i16[i] = i + 24;} - - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw32(v0, v1, v2, v3); - GSVector4i::sw16(v0, v2, v1, v3); - - GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0]; - GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1]; - - GSVector4i::store(&d0[0], v0); - GSVector4i::store(&d0[1], v1); - GSVector4i::store(&d1[0], v2); - GSVector4i::store(&d1[1], v3); - - #endif - } - - template __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - //for(int j = 0; j < 64; j++) ((uint8*)src)[j] = (uint8)j; - - #if 0//_M_SSE >= 0x501 - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i v0 = s[i * 2 + 0]; - GSVector8i v1 = s[i * 2 + 1]; - - GSVector8i::sw8(v0, v1); - GSVector8i::sw16(v0, v1); - GSVector8i::sw8(v0, v1); - GSVector8i::sw128(v0, v1); - GSVector8i::sw16(v0, v1); - - v0 = v0.acbd(); - v1 = v1.acbd(); - v1 = v1.yxwz(); - - GSVector8i::storel(&dst[dstpitch * 0], v0); - GSVector8i::storeh(&dst[dstpitch * 1], v0); - GSVector8i::storel(&dst[dstpitch * 2], v1); - GSVector8i::storeh(&dst[dstpitch * 3], v1); - - // TODO: not sure if this is worth it, not in this form, there should be a shorter path - - #elif _M_SSE >= 0x301 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - - if((i & 1) == 0) - { - v0 = s[i * 4 + 0]; - v1 = s[i * 4 + 1]; - v2 = s[i * 4 + 2]; - v3 = s[i * 4 + 3]; - } - else - { - v2 = s[i * 4 + 0]; - v3 = s[i * 4 + 1]; - v0 = s[i * 4 + 2]; - v1 = s[i * 4 + 3]; - } - - v0 = v0.shuffle8(m_r8mask); - v1 = v1.shuffle8(m_r8mask); - v2 = v2.shuffle8(m_r8mask); - v3 = v3.shuffle8(m_r8mask); - - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw32(v0, v1, v3, v2); - - GSVector4i::store(&dst[dstpitch * 0], v0); - GSVector4i::store(&dst[dstpitch * 1], v3); - GSVector4i::store(&dst[dstpitch * 2], v1); - GSVector4i::store(&dst[dstpitch * 3], v2); - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = s[i * 4 + 0]; - GSVector4i v1 = s[i * 4 + 1]; - GSVector4i v2 = s[i * 4 + 2]; - GSVector4i v3 = s[i * 4 + 3]; - - GSVector4i::sw8(v0, v1, v2, v3); - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw8(v0, v2, v1, v3); - GSVector4i::sw64(v0, v1, v2, v3); - - if((i & 1) == 0) - { - v2 = v2.yxwz(); - v3 = v3.yxwz(); - } - else - { - v0 = v0.yxwz(); - v1 = v1.yxwz(); - } - - GSVector4i::store(&dst[dstpitch * 0], v0); - GSVector4i::store(&dst[dstpitch * 1], v1); - GSVector4i::store(&dst[dstpitch * 2], v2); - GSVector4i::store(&dst[dstpitch * 3], v3); - - #endif - } - - template __forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - //printf("ReadColumn4\n"); - - #if _M_SSE >= 0x301 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = s[i * 4 + 0].xzyw(); - GSVector4i v1 = s[i * 4 + 1].xzyw(); - GSVector4i v2 = s[i * 4 + 2].xzyw(); - GSVector4i v3 = s[i * 4 + 3].xzyw(); - - GSVector4i::sw64(v0, v1, v2, v3); - GSVector4i::sw4(v0, v2, v1, v3); - GSVector4i::sw8(v0, v1, v2, v3); - - v0 = v0.shuffle8(m_r4mask); - v1 = v1.shuffle8(m_r4mask); - v2 = v2.shuffle8(m_r4mask); - v3 = v3.shuffle8(m_r4mask); - - if((i & 1) == 0) - { - GSVector4i::sw16rh(v0, v1, v2, v3); - } - else - { - GSVector4i::sw16rl(v0, v1, v2, v3); - } - - GSVector4i::store(&dst[dstpitch * 0], v0); - GSVector4i::store(&dst[dstpitch * 1], v1); - GSVector4i::store(&dst[dstpitch * 2], v2); - GSVector4i::store(&dst[dstpitch * 3], v3); - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = s[i * 4 + 0]; - GSVector4i v1 = s[i * 4 + 1]; - GSVector4i v2 = s[i * 4 + 2]; - GSVector4i v3 = s[i * 4 + 3]; - - GSVector4i::sw32(v0, v1, v2, v3); - GSVector4i::sw32(v0, v1, v2, v3); - GSVector4i::sw4(v0, v2, v1, v3); - GSVector4i::sw8(v0, v1, v2, v3); - GSVector4i::sw16(v0, v2, v1, v3); - - v0 = v0.xzyw(); - v1 = v1.xzyw(); - v2 = v2.xzyw(); - v3 = v3.xzyw(); - - GSVector4i::sw64(v0, v1, v2, v3); - - if((i & 1) == 0) - { - v2 = v2.yxwzlh(); - v3 = v3.yxwzlh(); - } - else - { - v0 = v0.yxwzlh(); - v1 = v1.yxwzlh(); - } - - GSVector4i::store(&dst[dstpitch * 0], v0); - GSVector4i::store(&dst[dstpitch * 1], v1); - GSVector4i::store(&dst[dstpitch * 2], v2); - GSVector4i::store(&dst[dstpitch * 3], v3); - - #endif - } - - static void ReadColumn32(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - switch((y >> 1) & 3) - { - case 0: ReadColumn32<0>(src, dst, dstpitch); break; - case 1: ReadColumn32<1>(src, dst, dstpitch); break; - case 2: ReadColumn32<2>(src, dst, dstpitch); break; - case 3: ReadColumn32<3>(src, dst, dstpitch); break; - default: __assume(0); - } - } - - static void ReadColumn16(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - switch((y >> 1) & 3) - { - case 0: ReadColumn16<0>(src, dst, dstpitch); break; - case 1: ReadColumn16<1>(src, dst, dstpitch); break; - case 2: ReadColumn16<2>(src, dst, dstpitch); break; - case 3: ReadColumn16<3>(src, dst, dstpitch); break; - default: __assume(0); - } - } - - static void ReadColumn8(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - switch((y >> 2) & 3) - { - case 0: ReadColumn8<0>(src, dst, dstpitch); break; - case 1: ReadColumn8<1>(src, dst, dstpitch); break; - case 2: ReadColumn8<2>(src, dst, dstpitch); break; - case 3: ReadColumn8<3>(src, dst, dstpitch); break; - default: __assume(0); - } - } - - static void ReadColumn4(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - switch((y >> 2) & 3) - { - case 0: ReadColumn4<0>(src, dst, dstpitch); break; - case 1: ReadColumn4<1>(src, dst, dstpitch); break; - case 2: ReadColumn4<2>(src, dst, dstpitch); break; - case 3: ReadColumn4<3>(src, dst, dstpitch); break; - default: __assume(0); - } - } - - static void ReadBlock32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - ReadColumn32<0>(src, dst, dstpitch); - dst += dstpitch * 2; - ReadColumn32<1>(src, dst, dstpitch); - dst += dstpitch * 2; - ReadColumn32<2>(src, dst, dstpitch); - dst += dstpitch * 2; - ReadColumn32<3>(src, dst, dstpitch); - } - - static void ReadBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - ReadColumn16<0>(src, dst, dstpitch); - dst += dstpitch * 2; - ReadColumn16<1>(src, dst, dstpitch); - dst += dstpitch * 2; - ReadColumn16<2>(src, dst, dstpitch); - dst += dstpitch * 2; - ReadColumn16<3>(src, dst, dstpitch); - } - - static void ReadBlock8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - ReadColumn8<0>(src, dst, dstpitch); - dst += dstpitch * 4; - ReadColumn8<1>(src, dst, dstpitch); - dst += dstpitch * 4; - ReadColumn8<2>(src, dst, dstpitch); - dst += dstpitch * 4; - ReadColumn8<3>(src, dst, dstpitch); - } - - static void ReadBlock4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - ReadColumn4<0>(src, dst, dstpitch); - dst += dstpitch * 4; - ReadColumn4<1>(src, dst, dstpitch); - dst += dstpitch * 4; - ReadColumn4<2>(src, dst, dstpitch); - dst += dstpitch * 4; - ReadColumn4<3>(src, dst, dstpitch); - } - - __forceinline static void ReadBlock4P(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - //printf("ReadBlock4P\n"); - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - - GSVector4i mask(0x0f0f0f0f); - - for(int i = 0; i < 2; i++) - { - // col 0, 2 - - v0 = s[i * 8 + 0]; - v1 = s[i * 8 + 1]; - v2 = s[i * 8 + 2]; - v3 = s[i * 8 + 3]; - - GSVector4i::sw8(v0, v1, v2, v3); - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw8(v0, v2, v1, v3); - - GSVector4i::store(&dst[dstpitch * 0 + 0], (v0 & mask)); - GSVector4i::store(&dst[dstpitch * 0 + 16], (v1 & mask)); - GSVector4i::store(&dst[dstpitch * 1 + 0], (v2 & mask)); - GSVector4i::store(&dst[dstpitch * 1 + 16], (v3 & mask)); - - dst += dstpitch * 2; - - GSVector4i::store(&dst[dstpitch * 0 + 0], (v0.andnot(mask)).yxwz() >> 4); - GSVector4i::store(&dst[dstpitch * 0 + 16], (v1.andnot(mask)).yxwz() >> 4); - GSVector4i::store(&dst[dstpitch * 1 + 0], (v2.andnot(mask)).yxwz() >> 4); - GSVector4i::store(&dst[dstpitch * 1 + 16], (v3.andnot(mask)).yxwz() >> 4); - - dst += dstpitch * 2; - - // col 1, 3 - - v0 = s[i * 8 + 4]; - v1 = s[i * 8 + 5]; - v2 = s[i * 8 + 6]; - v3 = s[i * 8 + 7]; - - GSVector4i::sw8(v0, v1, v2, v3); - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw8(v0, v2, v1, v3); - - GSVector4i::store(&dst[dstpitch * 0 + 0], (v0 & mask).yxwz()); - GSVector4i::store(&dst[dstpitch * 0 + 16], (v1 & mask).yxwz()); - GSVector4i::store(&dst[dstpitch * 1 + 0], (v2 & mask).yxwz()); - GSVector4i::store(&dst[dstpitch * 1 + 16], (v3 & mask).yxwz()); - - dst += dstpitch * 2; - - GSVector4i::store(&dst[dstpitch * 0 + 0], (v0.andnot(mask)) >> 4); - GSVector4i::store(&dst[dstpitch * 0 + 16], (v1.andnot(mask)) >> 4); - GSVector4i::store(&dst[dstpitch * 1 + 0], (v2.andnot(mask)) >> 4); - GSVector4i::store(&dst[dstpitch * 1 + 16], (v3.andnot(mask)) >> 4); - - dst += dstpitch * 2; - } - } - - __forceinline static void ReadBlock8HP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - #if _M_SSE >= 0x501 - - uint8* RESTRICT d0 = &dst[dstpitch * 0]; - uint8* RESTRICT d1 = &dst[dstpitch * 4]; - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i v0, v1, v2, v3; - GSVector4i v4, v5; - - v0 = s[0].acbd(); - v1 = s[1].acbd(); - v2 = s[2].acbd(); - v3 = s[3].acbd(); - - v0 = (v0 >> 24).ps32(v1 >> 24).pu16((v2 >> 24).ps32(v3 >> 24)); - - v4 = v0.extract<0>(); - v5 = v0.extract<1>(); - - GSVector4i::storel(&d0[dstpitch * 0], v4); - GSVector4i::storel(&d0[dstpitch * 1], v5); - GSVector4i::storeh(&d0[dstpitch * 2], v4); - GSVector4i::storeh(&d0[dstpitch * 3], v5); - - v0 = s[4].acbd(); - v1 = s[5].acbd(); - v2 = s[6].acbd(); - v3 = s[7].acbd(); - - v0 = (v0 >> 24).ps32(v1 >> 24).pu16((v2 >> 24).ps32(v3 >> 24)); - - v4 = v0.extract<0>(); - v5 = v0.extract<1>(); - - GSVector4i::storel(&d1[dstpitch * 0], v4); - GSVector4i::storel(&d1[dstpitch * 1], v5); - GSVector4i::storeh(&d1[dstpitch * 2], v4); - GSVector4i::storeh(&d1[dstpitch * 3], v5); - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - - for(int i = 0; i < 4; i++) - { - v0 = s[i * 4 + 0]; - v1 = s[i * 4 + 1]; - v2 = s[i * 4 + 2]; - v3 = s[i * 4 + 3]; - - GSVector4i::sw64(v0, v1, v2, v3); - - v0 = ((v0 >> 24).ps32(v1 >> 24)).pu16((v2 >> 24).ps32(v3 >> 24)); - - GSVector4i::storel(dst, v0); - - dst += dstpitch; - - GSVector4i::storeh(dst, v0); - - dst += dstpitch; - } - - #endif - } - - __forceinline static void ReadBlock4HLP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - #if _M_SSE >= 0x501 - - uint8* RESTRICT d0 = &dst[dstpitch * 0]; - uint8* RESTRICT d1 = &dst[dstpitch * 4]; - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i v0, v1, v2, v3; - GSVector4i v4, v5; - GSVector8i mask(0x0f0f0f0f); - - v0 = s[0].acbd(); - v1 = s[1].acbd(); - v2 = s[2].acbd(); - v3 = s[3].acbd(); - - v0 = (v0 >> 24).ps32(v1 >> 24).pu16((v2 >> 24).ps32(v3 >> 24)) & mask; - - v4 = v0.extract<0>(); - v5 = v0.extract<1>(); - - GSVector4i::storel(&d0[dstpitch * 0], v4); - GSVector4i::storel(&d0[dstpitch * 1], v5); - GSVector4i::storeh(&d0[dstpitch * 2], v4); - GSVector4i::storeh(&d0[dstpitch * 3], v5); - - v0 = s[4].acbd(); - v1 = s[5].acbd(); - v2 = s[6].acbd(); - v3 = s[7].acbd(); - - v0 = (v0 >> 24).ps32(v1 >> 24).pu16((v2 >> 24).ps32(v3 >> 24)) & mask; - - v4 = v0.extract<0>(); - v5 = v0.extract<1>(); - - GSVector4i::storel(&d1[dstpitch * 0], v4); - GSVector4i::storel(&d1[dstpitch * 1], v5); - GSVector4i::storeh(&d1[dstpitch * 2], v4); - GSVector4i::storeh(&d1[dstpitch * 3], v5); - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - - GSVector4i mask(0x0f0f0f0f); - - for(int i = 0; i < 4; i++) - { - v0 = s[i * 4 + 0]; - v1 = s[i * 4 + 1]; - v2 = s[i * 4 + 2]; - v3 = s[i * 4 + 3]; - - GSVector4i::sw64(v0, v1, v2, v3); - - v0 = ((v0 >> 24).ps32(v1 >> 24)).pu16((v2 >> 24).ps32(v3 >> 24)) & mask; - - GSVector4i::storel(dst, v0); - - dst += dstpitch; - - GSVector4i::storeh(dst, v0); - - dst += dstpitch; - } - - #endif - } - - __forceinline static void ReadBlock4HHP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) - { - #if _M_SSE >= 0x501 - - uint8* RESTRICT d0 = &dst[dstpitch * 0]; - uint8* RESTRICT d1 = &dst[dstpitch * 4]; - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i v0, v1, v2, v3; - GSVector4i v4, v5; - - v0 = s[0].acbd(); - v1 = s[1].acbd(); - v2 = s[2].acbd(); - v3 = s[3].acbd(); - - v0 = (v0 >> 28).ps32(v1 >> 28).pu16((v2 >> 28).ps32(v3 >> 28)); - - v4 = v0.extract<0>(); - v5 = v0.extract<1>(); - - GSVector4i::storel(&d0[dstpitch * 0], v4); - GSVector4i::storel(&d0[dstpitch * 1], v5); - GSVector4i::storeh(&d0[dstpitch * 2], v4); - GSVector4i::storeh(&d0[dstpitch * 3], v5); - - v0 = s[4].acbd(); - v1 = s[5].acbd(); - v2 = s[6].acbd(); - v3 = s[7].acbd(); - - v0 = (v0 >> 28).ps32(v1 >> 28).pu16((v2 >> 28).ps32(v3 >> 28)); - - v4 = v0.extract<0>(); - v5 = v0.extract<1>(); - - GSVector4i::storel(&d1[dstpitch * 0], v4); - GSVector4i::storel(&d1[dstpitch * 1], v5); - GSVector4i::storeh(&d1[dstpitch * 2], v4); - GSVector4i::storeh(&d1[dstpitch * 3], v5); - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - - for(int i = 0; i < 4; i++) - { - v0 = s[i * 4 + 0]; - v1 = s[i * 4 + 1]; - v2 = s[i * 4 + 2]; - v3 = s[i * 4 + 3]; - - GSVector4i::sw64(v0, v1, v2, v3); - - v0 = ((v0 >> 28).ps32(v1 >> 28)).pu16((v2 >> 28).ps32(v3 >> 28)); - - GSVector4i::storel(dst, v0); - - dst += dstpitch; - - GSVector4i::storeh(dst, v0); - - dst += dstpitch; - } - - #endif - } - - template __forceinline static V Expand24to32(const V& c, const V& TA0) - { - return c | (AEM ? TA0.andnot(c == V::zero()) : TA0); // TA0 & (c != GSVector4i::zero()) - } - - template __forceinline static V Expand16to32(const V& c, const V& TA0, const V& TA1) - { - return ((c & m_rxxx) << 3) | ((c & m_xgxx) << 6) | ((c & m_xxbx) << 9) | (AEM ? TA0.blend8(TA1, c.sra16(15)).andnot(c == V::zero()) : TA0.blend(TA1, c.sra16(15))); - } - - template static void ExpandBlock24(const uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) - { - #if _M_SSE >= 0x501 - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i TA0(TEXA.TA0 << 24); - GSVector8i mask = GSVector8i::x00ffffff(); - - for(int i = 0; i < 4; i++, dst += dstpitch * 2) - { - GSVector8i v0 = s[i * 2 + 0] & mask; - GSVector8i v1 = s[i * 2 + 1] & mask; - - GSVector8i* d0 = (GSVector8i*)&dst[dstpitch * 0]; - GSVector8i* d1 = (GSVector8i*)&dst[dstpitch * 1]; - - d0[0] = Expand24to32(v0, TA0); - d1[0] = Expand24to32(v1, TA0); - } - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i TA0(TEXA.TA0 << 24); - GSVector4i mask = GSVector4i::x00ffffff(); - - for(int i = 0; i < 4; i++, dst += dstpitch * 2) - { - GSVector4i v0 = s[i * 4 + 0] & mask; - GSVector4i v1 = s[i * 4 + 1] & mask; - GSVector4i v2 = s[i * 4 + 2] & mask; - GSVector4i v3 = s[i * 4 + 3] & mask; - - GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0]; - GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1]; - - d0[0] = Expand24to32(v0, TA0); - d0[1] = Expand24to32(v1, TA0); - d1[0] = Expand24to32(v2, TA0); - d1[1] = Expand24to32(v3, TA0); - } - - #endif - } - - template static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs - { - #if _M_SSE >= 0x501 - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i TA0(TEXA.TA0 << 24); - GSVector8i TA1(TEXA.TA1 << 24); - - for(int i = 0; i < 8; i++, dst += dstpitch) - { - GSVector8i v = s[i].acbd(); - - ((GSVector8i*)dst)[0] = Expand16to32(v.upl16(v), TA0, TA1); - ((GSVector8i*)dst)[1] = Expand16to32(v.uph16(v), TA0, TA1); - } - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i TA0(TEXA.TA0 << 24); - GSVector4i TA1(TEXA.TA1 << 24); - - for(int i = 0; i < 8; i++, dst += dstpitch) - { - GSVector4i v0 = s[i * 2 + 0]; - - ((GSVector4i*)dst)[0] = Expand16to32(v0.upl16(v0), TA0, TA1); - ((GSVector4i*)dst)[1] = Expand16to32(v0.uph16(v0), TA0, TA1); - - GSVector4i v1 = s[i * 2 + 1]; - - ((GSVector4i*)dst)[2] = Expand16to32(v1.upl16(v1), TA0, TA1); - ((GSVector4i*)dst)[3] = Expand16to32(v1.uph16(v1), TA0, TA1); - } - - #endif - } - - __forceinline static void ExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - for(int j = 0; j < 16; j++, dst += dstpitch) - { - ((const GSVector4i*)src)[j].gather32_8(pal, (GSVector4i*)dst); - } - } - - __forceinline static void ExpandBlock8_16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - for(int j = 0; j < 16; j++, dst += dstpitch) - { - ((const GSVector4i*)src)[j].gather16_8(pal, (GSVector4i*)dst); - } - } - - __forceinline static void ExpandBlock4_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint64* RESTRICT pal) - { - for(int j = 0; j < 16; j++, dst += dstpitch) - { - ((const GSVector4i*)src)[j].gather64_8(pal, (GSVector4i*)dst); - } - } - - __forceinline static void ExpandBlock4_16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint64* RESTRICT pal) - { - for(int j = 0; j < 16; j++, dst += dstpitch) - { - ((const GSVector4i*)src)[j].gather32_8(pal, (GSVector4i*)dst); - } - } - - __forceinline static void ExpandBlock8H_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - for(int j = 0; j < 8; j++, dst += dstpitch) - { - const GSVector4i* s = (const GSVector4i*)src; - - ((GSVector4i*)dst)[0] = (s[j * 2 + 0] >> 24).gather32_32<>(pal); - ((GSVector4i*)dst)[1] = (s[j * 2 + 1] >> 24).gather32_32<>(pal); - } - } - - __forceinline static void ExpandBlock8H_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - for(int j = 0; j < 8; j++, dst += dstpitch) - { - #if _M_SSE >= 0x401 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = (s[j * 2 + 0] >> 24).gather32_32<>(pal); - GSVector4i v1 = (s[j * 2 + 1] >> 24).gather32_32<>(pal); - - ((GSVector4i*)dst)[0] = v0.pu32(v1); - - #else - - for(int i = 0; i < 8; i++) - { - ((uint16*)dst)[i] = (uint16)pal[src[j * 8 + i] >> 24]; - } - - #endif - } - } - - __forceinline static void ExpandBlock4HL_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - for(int j = 0; j < 8; j++, dst += dstpitch) - { - const GSVector4i* s = (const GSVector4i*)src; - - ((GSVector4i*)dst)[0] = ((s[j * 2 + 0] >> 24) & 0xf).gather32_32<>(pal); - ((GSVector4i*)dst)[1] = ((s[j * 2 + 1] >> 24) & 0xf).gather32_32<>(pal); - } - } - - __forceinline static void ExpandBlock4HL_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - for(int j = 0; j < 8; j++, dst += dstpitch) - { - #if _M_SSE >= 0x401 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = ((s[j * 2 + 0] >> 24) & 0xf).gather32_32<>(pal); - GSVector4i v1 = ((s[j * 2 + 1] >> 24) & 0xf).gather32_32<>(pal); - - ((GSVector4i*)dst)[0] = v0.pu32(v1); - - #else - - for(int i = 0; i < 8; i++) - { - ((uint16*)dst)[i] = (uint16)pal[(src[j * 8 + i] >> 24) & 0xf]; - } - - #endif - } - } - - __forceinline static void ExpandBlock4HH_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - for(int j = 0; j < 8; j++, dst += dstpitch) - { - const GSVector4i* s = (const GSVector4i*)src; - - ((GSVector4i*)dst)[0] = (s[j * 2 + 0] >> 28).gather32_32<>(pal); - ((GSVector4i*)dst)[1] = (s[j * 2 + 1] >> 28).gather32_32<>(pal); - } - } - - __forceinline static void ExpandBlock4HH_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - for(int j = 0; j < 8; j++, dst += dstpitch) - { - #if _M_SSE >= 0x401 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0 = (s[j * 2 + 0] >> 28).gather32_32<>(pal); - GSVector4i v1 = (s[j * 2 + 1] >> 28).gather32_32<>(pal); - - ((GSVector4i*)dst)[0] = v0.pu32(v1); - - #else - - for(int i = 0; i < 8; i++) - { - ((uint16*)dst)[i] = (uint16)pal[src[j * 8 + i] >> 28]; - } - - #endif - } - } - - __forceinline static void UnpackAndWriteBlock24(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst) - { - #if _M_SSE >= 0x501 - - const uint8* RESTRICT s0 = &src[srcpitch * 0]; - const uint8* RESTRICT s1 = &src[srcpitch * 1]; - const uint8* RESTRICT s2 = &src[srcpitch * 2]; - const uint8* RESTRICT s3 = &src[srcpitch * 3]; - - GSVector8i v0, v1, v2, v3, v4, v5, v6; - GSVector8i mask = GSVector8i::x00ffffff(); - - v4 = GSVector8i::load(s0, s0 + 8, s2, s2 + 8); - v5 = GSVector8i::load(s0 + 16, s1, s2 + 16, s3); - v6 = GSVector8i::load(s1 + 8, s1 + 16, s3 + 8, s3 + 16); - - v0 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); - v4 = v4.srl<12>(v5); - v1 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); - v4 = v5.srl<8>(v6); - v2 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); - v4 = v6.srl<4>(); - v3 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); - - GSVector8i::sw64(v0, v2, v1, v3); - - ((GSVector8i*)dst)[0] = ((GSVector8i*)dst)[0].blend8(v0, mask); - ((GSVector8i*)dst)[1] = ((GSVector8i*)dst)[1].blend8(v2, mask); - ((GSVector8i*)dst)[2] = ((GSVector8i*)dst)[2].blend8(v1, mask); - ((GSVector8i*)dst)[3] = ((GSVector8i*)dst)[3].blend8(v3, mask); - - src += srcpitch * 4; - - s0 = &src[srcpitch * 0]; - s1 = &src[srcpitch * 1]; - s2 = &src[srcpitch * 2]; - s3 = &src[srcpitch * 3]; - - v4 = GSVector8i::load(s0, s0 + 8, s2, s2 + 8); - v5 = GSVector8i::load(s0 + 16, s1, s2 + 16, s3); - v6 = GSVector8i::load(s1 + 8, s1 + 16, s3 + 8, s3 + 16); - - v0 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); - v4 = v4.srl<12>(v5); - v1 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); - v4 = v5.srl<8>(v6); - v2 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); - v4 = v6.srl<4>(); - v3 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); - - GSVector8i::sw64(v0, v2, v1, v3); - - ((GSVector8i*)dst)[4] = ((GSVector8i*)dst)[4].blend8(v0, mask); - ((GSVector8i*)dst)[5] = ((GSVector8i*)dst)[5].blend8(v2, mask); - ((GSVector8i*)dst)[6] = ((GSVector8i*)dst)[6].blend8(v1, mask); - ((GSVector8i*)dst)[7] = ((GSVector8i*)dst)[7].blend8(v3, mask); - - #else - - GSVector4i v0, v1, v2, v3, v4, v5, v6; - GSVector4i mask = GSVector4i::x00ffffff(); - - for(int i = 0; i < 4; i++, src += srcpitch * 2) - { - v4 = GSVector4i::load(src); - v5 = GSVector4i::load(src + 16, src + srcpitch); - v6 = GSVector4i::load(src + srcpitch + 8); - - v0 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())); - v4 = v4.srl<12>(v5); - v1 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())); - v4 = v5.srl<8>(v6); - v2 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())); - v4 = v6.srl<4>(); - v3 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())); - - GSVector4i::sw64(v0, v2, v1, v3); - - ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, mask); - ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, mask); - ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend8(v2, mask); - ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask); - } - - #endif - } - - __forceinline static void UnpackAndWriteBlock8H(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst) - { - GSVector4i v4, v5, v6, v7; - - #if _M_SSE >= 0x501 - - GSVector8i v0, v1, v2, v3; - GSVector8i mask = GSVector8i::xff000000(); - - v4 = GSVector4i::loadl(&src[srcpitch * 0]); - v5 = GSVector4i::loadl(&src[srcpitch * 1]); - v6 = GSVector4i::loadl(&src[srcpitch * 2]); - v7 = GSVector4i::loadl(&src[srcpitch * 3]); - - v2 = GSVector8i::cast(v4.upl16(v5)); - v3 = GSVector8i::cast(v6.upl16(v7)); - - v0 = v2.u8to32c() << 24; - v1 = v2.bbbb().u8to32c() << 24; - v2 = v3.u8to32c() << 24; - v3 = v3.bbbb().u8to32c() << 24; - - ((GSVector8i*)dst)[0] = ((GSVector8i*)dst)[0].blend8(v0, mask); - ((GSVector8i*)dst)[1] = ((GSVector8i*)dst)[1].blend8(v1, mask); - ((GSVector8i*)dst)[2] = ((GSVector8i*)dst)[2].blend8(v2, mask); - ((GSVector8i*)dst)[3] = ((GSVector8i*)dst)[3].blend8(v3, mask); - - src += srcpitch * 4; - - v4 = GSVector4i::loadl(&src[srcpitch * 0]); - v5 = GSVector4i::loadl(&src[srcpitch * 1]); - v6 = GSVector4i::loadl(&src[srcpitch * 2]); - v7 = GSVector4i::loadl(&src[srcpitch * 3]); - - v2 = GSVector8i::cast(v4.upl16(v5)); - v3 = GSVector8i::cast(v6.upl16(v7)); - - v0 = v2.u8to32c() << 24; - v1 = v2.bbbb().u8to32c() << 24; - v2 = v3.u8to32c() << 24; - v3 = v3.bbbb().u8to32c() << 24; - - ((GSVector8i*)dst)[4] = ((GSVector8i*)dst)[4].blend8(v0, mask); - ((GSVector8i*)dst)[5] = ((GSVector8i*)dst)[5].blend8(v1, mask); - ((GSVector8i*)dst)[6] = ((GSVector8i*)dst)[6].blend8(v2, mask); - ((GSVector8i*)dst)[7] = ((GSVector8i*)dst)[7].blend8(v3, mask); - - #elif _M_SSE >= 0x301 - - GSVector4i v0, v1, v2, v3; - GSVector4i mask = GSVector4i::xff000000(); - GSVector4i mask0 = m_uw8hmask0; - GSVector4i mask1 = m_uw8hmask1; - GSVector4i mask2 = m_uw8hmask2; - GSVector4i mask3 = m_uw8hmask3; - - for(int i = 0; i < 4; i++, src += srcpitch * 2) - { - v4 = GSVector4i::load(src, src + srcpitch); - - v0 = v4.shuffle8(mask0); - v1 = v4.shuffle8(mask1); - v2 = v4.shuffle8(mask2); - v3 = v4.shuffle8(mask3); - - ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, mask); - ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, mask); - ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend8(v2, mask); - ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask); - } - - #else - - GSVector4i v0, v1, v2, v3; - GSVector4i mask = GSVector4i::xff000000(); - - for(int i = 0; i < 4; i++, src += srcpitch * 2) - { - v4 = GSVector4i::loadl(&src[srcpitch * 0]); - v5 = GSVector4i::loadl(&src[srcpitch * 1]); - - v6 = v4.upl16(v5); - - v4 = v6.upl8(v6); - v5 = v6.uph8(v6); - - v0 = v4.upl16(v4); - v1 = v4.uph16(v4); - v2 = v5.upl16(v5); - v3 = v5.uph16(v5); - - ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, mask); - ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, mask); - ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend8(v2, mask); - ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask); - } - - #endif - } - - __forceinline static void UnpackAndWriteBlock4HL(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst) - { - //printf("4HL\n"); - - if(0) - { - uint8* s = (uint8*)src; - for(int j = 0; j < 8; j++, s += srcpitch) - for(int i = 0; i < 4; i++) s[i] = (columnTable32[j][i*2] & 0x0f) | (columnTable32[j][i*2+1] << 4); - } - - GSVector4i v4, v5, v6, v7; - - #if _M_SSE >= 0x501 - - GSVector8i v0, v1, v2, v3; - GSVector8i mask(0x0f000000); - - v6 = GSVector4i(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]); - - v4 = v6.upl8(v6 >> 4); - v5 = v6.uph8(v6 >> 4); - - v2 = GSVector8i::cast(v4.upl16(v5)); - v3 = GSVector8i::cast(v4.uph16(v5)); - - v0 = v2.u8to32c() << 24; - v1 = v2.bbbb().u8to32c() << 24; - v2 = v3.u8to32c() << 24; - v3 = v3.bbbb().u8to32c() << 24; - - ((GSVector8i*)dst)[0] = ((GSVector8i*)dst)[0].blend(v0, mask); - ((GSVector8i*)dst)[1] = ((GSVector8i*)dst)[1].blend(v1, mask); - ((GSVector8i*)dst)[2] = ((GSVector8i*)dst)[2].blend(v2, mask); - ((GSVector8i*)dst)[3] = ((GSVector8i*)dst)[3].blend(v3, mask); - - src += srcpitch * 4; - - v6 = GSVector4i(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]); - - v4 = v6.upl8(v6 >> 4); - v5 = v6.uph8(v6 >> 4); - - v2 = GSVector8i::cast(v4.upl16(v5)); - v3 = GSVector8i::cast(v4.uph16(v5)); - - v0 = v2.u8to32c() << 24; - v1 = v2.bbbb().u8to32c() << 24; - v2 = v3.u8to32c() << 24; - v3 = v3.bbbb().u8to32c() << 24; - - ((GSVector8i*)dst)[4] = ((GSVector8i*)dst)[4].blend(v0, mask); - ((GSVector8i*)dst)[5] = ((GSVector8i*)dst)[5].blend(v1, mask); - ((GSVector8i*)dst)[6] = ((GSVector8i*)dst)[6].blend(v2, mask); - ((GSVector8i*)dst)[7] = ((GSVector8i*)dst)[7].blend(v3, mask); - - #elif _M_SSE >= 0x301 - - GSVector4i v0, v1, v2, v3; - GSVector4i mask = GSVector4i(0x0f000000); - GSVector4i mask0 = m_uw8hmask0; - GSVector4i mask1 = m_uw8hmask1; - GSVector4i mask2 = m_uw8hmask2; - GSVector4i mask3 = m_uw8hmask3; - - for(int i = 0; i < 2; i++, src += srcpitch * 4) - { - GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 3]); - - v4 = v.upl8(v >> 4); - v5 = v.uph8(v >> 4); - - v0 = v4.shuffle8(mask0); - v1 = v4.shuffle8(mask1); - v2 = v4.shuffle8(mask2); - v3 = v4.shuffle8(mask3); - - ((GSVector4i*)dst)[i * 8 + 0] = ((GSVector4i*)dst)[i * 8 + 0].blend(v0, mask); - ((GSVector4i*)dst)[i * 8 + 1] = ((GSVector4i*)dst)[i * 8 + 1].blend(v1, mask); - ((GSVector4i*)dst)[i * 8 + 2] = ((GSVector4i*)dst)[i * 8 + 2].blend(v2, mask); - ((GSVector4i*)dst)[i * 8 + 3] = ((GSVector4i*)dst)[i * 8 + 3].blend(v3, mask); - - v0 = v5.shuffle8(mask0); - v1 = v5.shuffle8(mask1); - v2 = v5.shuffle8(mask2); - v3 = v5.shuffle8(mask3); - - ((GSVector4i*)dst)[i * 8 + 4] = ((GSVector4i*)dst)[i * 8 + 4].blend(v0, mask); - ((GSVector4i*)dst)[i * 8 + 5] = ((GSVector4i*)dst)[i * 8 + 5].blend(v1, mask); - ((GSVector4i*)dst)[i * 8 + 6] = ((GSVector4i*)dst)[i * 8 + 6].blend(v2, mask); - ((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask); - } - - #else - - GSVector4i v0, v1, v2, v3; - GSVector4i mask = GSVector4i(0x0f000000); - - for(int i = 0; i < 2; i++, src += srcpitch * 4) - { - GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]); - - v4 = v.upl8(v >> 4); - v5 = v.uph8(v >> 4); - - v6 = v4.upl16(v5); - v7 = v4.uph16(v5); - - v4 = v6.upl8(v6); - v5 = v6.uph8(v6); - v6 = v7.upl8(v7); - v7 = v7.uph8(v7); - - v0 = v4.upl16(v4); - v1 = v4.uph16(v4); - v2 = v5.upl16(v5); - v3 = v5.uph16(v5); - - ((GSVector4i*)dst)[i * 8 + 0] = ((GSVector4i*)dst)[i * 8 + 0].blend(v0, mask); - ((GSVector4i*)dst)[i * 8 + 1] = ((GSVector4i*)dst)[i * 8 + 1].blend(v1, mask); - ((GSVector4i*)dst)[i * 8 + 2] = ((GSVector4i*)dst)[i * 8 + 2].blend(v2, mask); - ((GSVector4i*)dst)[i * 8 + 3] = ((GSVector4i*)dst)[i * 8 + 3].blend(v3, mask); - - v0 = v6.upl16(v6); - v1 = v6.uph16(v6); - v2 = v7.upl16(v7); - v3 = v7.uph16(v7); - - ((GSVector4i*)dst)[i * 8 + 4] = ((GSVector4i*)dst)[i * 8 + 4].blend(v0, mask); - ((GSVector4i*)dst)[i * 8 + 5] = ((GSVector4i*)dst)[i * 8 + 5].blend(v1, mask); - ((GSVector4i*)dst)[i * 8 + 6] = ((GSVector4i*)dst)[i * 8 + 6].blend(v2, mask); - ((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask); - } - - #endif - } - - __forceinline static void UnpackAndWriteBlock4HH(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst) - { - GSVector4i v4, v5, v6, v7; - - #if _M_SSE >= 0x501 - - GSVector8i v0, v1, v2, v3; - GSVector8i mask = GSVector8i::xf0000000(); - - v6 = GSVector4i(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]); - - v4 = (v6 << 4).upl8(v6); - v5 = (v6 << 4).uph8(v6); - - v2 = GSVector8i::cast(v4.upl16(v5)); - v3 = GSVector8i::cast(v4.uph16(v5)); - - v0 = v2.u8to32c() << 24; - v1 = v2.bbbb().u8to32c() << 24; - v2 = v3.u8to32c() << 24; - v3 = v3.bbbb().u8to32c() << 24; - - ((GSVector8i*)dst)[0] = ((GSVector8i*)dst)[0].blend(v0, mask); - ((GSVector8i*)dst)[1] = ((GSVector8i*)dst)[1].blend(v1, mask); - ((GSVector8i*)dst)[2] = ((GSVector8i*)dst)[2].blend(v2, mask); - ((GSVector8i*)dst)[3] = ((GSVector8i*)dst)[3].blend(v3, mask); - - src += srcpitch * 4; - - v6 = GSVector4i(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]); - - v4 = (v6 << 4).upl8(v6); - v5 = (v6 << 4).uph8(v6); - - v2 = GSVector8i::cast(v4.upl16(v5)); - v3 = GSVector8i::cast(v4.uph16(v5)); - - v0 = v2.u8to32c() << 24; - v1 = v2.bbbb().u8to32c() << 24; - v2 = v3.u8to32c() << 24; - v3 = v3.bbbb().u8to32c() << 24; - - ((GSVector8i*)dst)[4] = ((GSVector8i*)dst)[4].blend(v0, mask); - ((GSVector8i*)dst)[5] = ((GSVector8i*)dst)[5].blend(v1, mask); - ((GSVector8i*)dst)[6] = ((GSVector8i*)dst)[6].blend(v2, mask); - ((GSVector8i*)dst)[7] = ((GSVector8i*)dst)[7].blend(v3, mask); - - #elif _M_SSE >= 0x301 - - GSVector4i v0, v1, v2, v3; - GSVector4i mask = GSVector4i::xf0000000(); - GSVector4i mask0 = m_uw8hmask0; - GSVector4i mask1 = m_uw8hmask1; - GSVector4i mask2 = m_uw8hmask2; - GSVector4i mask3 = m_uw8hmask3; - - for(int i = 0; i < 2; i++, src += srcpitch * 4) - { - GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 3]); - - v4 = (v << 4).upl8(v); - v5 = (v << 4).uph8(v); - - v0 = v4.shuffle8(mask0); - v1 = v4.shuffle8(mask1); - v2 = v4.shuffle8(mask2); - v3 = v4.shuffle8(mask3); - - ((GSVector4i*)dst)[i * 8 + 0] = ((GSVector4i*)dst)[i * 8 + 0].blend(v0, mask); - ((GSVector4i*)dst)[i * 8 + 1] = ((GSVector4i*)dst)[i * 8 + 1].blend(v1, mask); - ((GSVector4i*)dst)[i * 8 + 2] = ((GSVector4i*)dst)[i * 8 + 2].blend(v2, mask); - ((GSVector4i*)dst)[i * 8 + 3] = ((GSVector4i*)dst)[i * 8 + 3].blend(v3, mask); - - v0 = v5.shuffle8(mask0); - v1 = v5.shuffle8(mask1); - v2 = v5.shuffle8(mask2); - v3 = v5.shuffle8(mask3); - - ((GSVector4i*)dst)[i * 8 + 4] = ((GSVector4i*)dst)[i * 8 + 4].blend(v0, mask); - ((GSVector4i*)dst)[i * 8 + 5] = ((GSVector4i*)dst)[i * 8 + 5].blend(v1, mask); - ((GSVector4i*)dst)[i * 8 + 6] = ((GSVector4i*)dst)[i * 8 + 6].blend(v2, mask); - ((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask); - } - - #else - - GSVector4i v0, v1, v2, v3; - GSVector4i mask = GSVector4i::xf0000000(); - - for(int i = 0; i < 2; i++, src += srcpitch * 4) - { - GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]); - - v4 = (v << 4).upl8(v); - v5 = (v << 4).uph8(v); - - v6 = v4.upl16(v5); - v7 = v4.uph16(v5); - - v4 = v6.upl8(v6); - v5 = v6.uph8(v6); - v6 = v7.upl8(v7); - v7 = v7.uph8(v7); - - v0 = v4.upl16(v4); - v1 = v4.uph16(v4); - v2 = v5.upl16(v5); - v3 = v5.uph16(v5); - - ((GSVector4i*)dst)[i * 8 + 0] = ((GSVector4i*)dst)[i * 8 + 0].blend(v0, mask); - ((GSVector4i*)dst)[i * 8 + 1] = ((GSVector4i*)dst)[i * 8 + 1].blend(v1, mask); - ((GSVector4i*)dst)[i * 8 + 2] = ((GSVector4i*)dst)[i * 8 + 2].blend(v2, mask); - ((GSVector4i*)dst)[i * 8 + 3] = ((GSVector4i*)dst)[i * 8 + 3].blend(v3, mask); - - v0 = v6.upl16(v6); - v1 = v6.uph16(v6); - v2 = v7.upl16(v7); - v3 = v7.uph16(v7); - - ((GSVector4i*)dst)[i * 8 + 4] = ((GSVector4i*)dst)[i * 8 + 4].blend(v0, mask); - ((GSVector4i*)dst)[i * 8 + 5] = ((GSVector4i*)dst)[i * 8 + 5].blend(v1, mask); - ((GSVector4i*)dst)[i * 8 + 6] = ((GSVector4i*)dst)[i * 8 + 6].blend(v2, mask); - ((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask); - } - - #endif - } - - template __forceinline static void ReadAndExpandBlock24(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) - { - #if _M_SSE >= 0x501 - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i TA0(TEXA.TA0 << 24); - GSVector8i mask = GSVector8i::x00ffffff(); - - GSVector8i v0, v1, v2, v3; - - v0 = s[0] & mask; - v1 = s[1] & mask; - v2 = s[2] & mask; - v3 = s[3] & mask; - - GSVector8i::sw128(v0, v1); - GSVector8i::sw64(v0, v1); - GSVector8i::sw128(v2, v3); - GSVector8i::sw64(v2, v3); - - *(GSVector8i*)&dst[dstpitch * 0] = Expand24to32(v0, TA0); - *(GSVector8i*)&dst[dstpitch * 1] = Expand24to32(v1, TA0); - *(GSVector8i*)&dst[dstpitch * 2] = Expand24to32(v2, TA0); - *(GSVector8i*)&dst[dstpitch * 3] = Expand24to32(v3, TA0); - - v0 = s[4] & mask; - v1 = s[5] & mask; - v2 = s[6] & mask; - v3 = s[7] & mask; - - GSVector8i::sw128(v0, v1); - GSVector8i::sw64(v0, v1); - GSVector8i::sw128(v2, v3); - GSVector8i::sw64(v2, v3); - - dst += dstpitch * 4; - - *(GSVector8i*)&dst[dstpitch * 0] = Expand24to32(v0, TA0); - *(GSVector8i*)&dst[dstpitch * 1] = Expand24to32(v1, TA0); - *(GSVector8i*)&dst[dstpitch * 2] = Expand24to32(v2, TA0); - *(GSVector8i*)&dst[dstpitch * 3] = Expand24to32(v3, TA0); - - #else - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i TA0(TEXA.TA0 << 24); - GSVector4i mask = GSVector4i::x00ffffff(); - - for(int i = 0; i < 4; i++, dst += dstpitch * 2) - { - GSVector4i v0 = s[i * 4 + 0]; - GSVector4i v1 = s[i * 4 + 1]; - GSVector4i v2 = s[i * 4 + 2]; - GSVector4i v3 = s[i * 4 + 3]; - - GSVector4i::sw64(v0, v1, v2, v3); - - v0 &= mask; - v1 &= mask; - v2 &= mask; - v3 &= mask; - - GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0]; - GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1]; - - d0[0] = Expand24to32(v0, TA0); - d0[1] = Expand24to32(v1, TA0); - d1[0] = Expand24to32(v2, TA0); - d1[1] = Expand24to32(v3, TA0); - } - - #endif - } - - template __forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) - { - #if _M_SSE >= 0x501 - - const GSVector8i* s = (const GSVector8i*)src; - - GSVector8i TA0(TEXA.TA0 << 24); - GSVector8i TA1(TEXA.TA1 << 24); - - for(int i = 0; i < 4; i++, dst += dstpitch * 2) - { - GSVector8i v0 = s[i * 2 + 0].shuffle8(m_r16mask); - GSVector8i v1 = s[i * 2 + 1].shuffle8(m_r16mask); - - GSVector8i::sw128(v0, v1); - GSVector8i::sw32(v0, v1); - - GSVector8i* d0 = (GSVector8i*)&dst[dstpitch * 0]; - GSVector8i* d1 = (GSVector8i*)&dst[dstpitch * 1]; - - d0[0] = Expand16to32(v0.upl16(v0), TA0, TA1); - d0[1] = Expand16to32(v0.uph16(v0), TA0, TA1); - d1[0] = Expand16to32(v1.upl16(v1), TA0, TA1); - d1[1] = Expand16to32(v1.uph16(v1), TA0, TA1); - } - - #elif 0 // not faster - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i TA0(TEXA.TA0 << 24); - GSVector4i TA1(TEXA.TA1 << 24); - - for(int i = 0; i < 4; i++, dst += dstpitch * 2) - { - GSVector4i v0 = s[i * 4 + 0]; - GSVector4i v1 = s[i * 4 + 1]; - GSVector4i v2 = s[i * 4 + 2]; - GSVector4i v3 = s[i * 4 + 3]; - - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw32(v0, v1, v2, v3); - GSVector4i::sw16(v0, v2, v1, v3); - - GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0]; - - d0[0] = Expand16to32(v0.upl16(v0), TA0, TA1); - d0[1] = Expand16to32(v0.uph16(v0), TA0, TA1); - d0[2] = Expand16to32(v1.upl16(v1), TA0, TA1); - d0[3] = Expand16to32(v1.uph16(v1), TA0, TA1); - - GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1]; - - d1[0] = Expand16to32(v2.upl16(v2), TA0, TA1); - d1[1] = Expand16to32(v2.uph16(v2), TA0, TA1); - d1[2] = Expand16to32(v3.upl16(v3), TA0, TA1); - d1[3] = Expand16to32(v3.uph16(v3), TA0, TA1); - } - - #else - - __aligned(uint16, 32) block[16 * 8]; - - ReadBlock16(src, (uint8*)block, sizeof(block) / 8); - - ExpandBlock16(block, dst, dstpitch, TEXA); - - #endif - } - - __forceinline static void ReadAndExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - //printf("ReadAndExpandBlock8_32\n"); - - #if _M_SSE >= 0x401 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - GSVector4i mask = m_r8mask; - - for(int i = 0; i < 2; i++) - { - v0 = s[i * 8 + 0].shuffle8(mask); - v1 = s[i * 8 + 1].shuffle8(mask); - v2 = s[i * 8 + 2].shuffle8(mask); - v3 = s[i * 8 + 3].shuffle8(mask); - - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw32(v0, v1, v3, v2); - - v0.gather32_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v3.gather32_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v1.gather32_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v2.gather32_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - - v2 = s[i * 8 + 4].shuffle8(mask); - v3 = s[i * 8 + 5].shuffle8(mask); - v0 = s[i * 8 + 6].shuffle8(mask); - v1 = s[i * 8 + 7].shuffle8(mask); - - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw32(v0, v1, v3, v2); - - v0.gather32_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v3.gather32_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v1.gather32_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v2.gather32_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - } - - #else - - __aligned(uint8, 32) block[16 * 16]; - - ReadBlock8(src, (uint8*)block, sizeof(block) / 16); - - ExpandBlock8_32(block, dst, dstpitch, pal); - - #endif - } - - // TODO: ReadAndExpandBlock8_16 - - __forceinline static void ReadAndExpandBlock4_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint64* RESTRICT pal) - { - //printf("ReadAndExpandBlock4_32\n"); - - #if _M_SSE >= 0x401 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - GSVector4i mask = m_r4mask; - - for(int i = 0; i < 2; i++) - { - v0 = s[i * 8 + 0].xzyw(); - v1 = s[i * 8 + 1].xzyw(); - v2 = s[i * 8 + 2].xzyw(); - v3 = s[i * 8 + 3].xzyw(); - - GSVector4i::sw64(v0, v1, v2, v3); - GSVector4i::sw4(v0, v2, v1, v3); - GSVector4i::sw8(v0, v1, v2, v3); - - v0 = v0.shuffle8(mask); - v1 = v1.shuffle8(mask); - v2 = v2.shuffle8(mask); - v3 = v3.shuffle8(mask); - - GSVector4i::sw16rh(v0, v1, v2, v3); - - v0.gather64_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v1.gather64_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v2.gather64_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v3.gather64_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - - v0 = s[i * 8 + 4].xzyw(); - v1 = s[i * 8 + 5].xzyw(); - v2 = s[i * 8 + 6].xzyw(); - v3 = s[i * 8 + 7].xzyw(); - - GSVector4i::sw64(v0, v1, v2, v3); - GSVector4i::sw4(v0, v2, v1, v3); - GSVector4i::sw8(v0, v1, v2, v3); - - v0 = v0.shuffle8(mask); - v1 = v1.shuffle8(mask); - v2 = v2.shuffle8(mask); - v3 = v3.shuffle8(mask); - - GSVector4i::sw16rl(v0, v1, v2, v3); - - v0.gather64_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v1.gather64_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v2.gather64_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - v3.gather64_8<>(pal, (GSVector4i*)dst); - dst += dstpitch; - } - - #else - - __aligned(uint8, 32) block[(32 / 2) * 16]; - - ReadBlock4(src, (uint8*)block, sizeof(block) / 16); - - ExpandBlock4_32(block, dst, dstpitch, pal); - - #endif - } - - // TODO: ReadAndExpandBlock4_16 - - __forceinline static void ReadAndExpandBlock8H_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - //printf("ReadAndExpandBlock8H_32\n"); - - #if _M_SSE >= 0x401 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - - for(int i = 0; i < 4; i++) - { - v0 = s[i * 4 + 0]; - v1 = s[i * 4 + 1]; - v2 = s[i * 4 + 2]; - v3 = s[i * 4 + 3]; - - GSVector4i::sw64(v0, v1, v2, v3); - - (v0 >> 24).gather32_32<>(pal, (GSVector4i*)&dst[0]); - (v1 >> 24).gather32_32<>(pal, (GSVector4i*)&dst[16]); - - dst += dstpitch; - - (v2 >> 24).gather32_32<>(pal, (GSVector4i*)&dst[0]); - (v3 >> 24).gather32_32<>(pal, (GSVector4i*)&dst[16]); - - dst += dstpitch; - } - - #else - - __aligned(uint32, 32) block[8 * 8]; - - ReadBlock32(src, (uint8*)block, sizeof(block) / 8); - - ExpandBlock8H_32(block, dst, dstpitch, pal); - - #endif - } - - // TODO: ReadAndExpandBlock8H_16 - - __forceinline static void ReadAndExpandBlock4HL_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - //printf("ReadAndExpandBlock4HL_32\n"); - - #if _M_SSE >= 0x401 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - - for(int i = 0; i < 4; i++) - { - v0 = s[i * 4 + 0]; - v1 = s[i * 4 + 1]; - v2 = s[i * 4 + 2]; - v3 = s[i * 4 + 3]; - - GSVector4i::sw64(v0, v1, v2, v3); - - ((v0 >> 24) & 0xf).gather32_32<>(pal, (GSVector4i*)&dst[0]); - ((v1 >> 24) & 0xf).gather32_32<>(pal, (GSVector4i*)&dst[16]); - - dst += dstpitch; - - ((v2 >> 24) & 0xf).gather32_32<>(pal, (GSVector4i*)&dst[0]); - ((v3 >> 24) & 0xf).gather32_32<>(pal, (GSVector4i*)&dst[16]); - - dst += dstpitch; - } - - #else - - __aligned(uint32, 32) block[8 * 8]; - - ReadBlock32(src, (uint8*)block, sizeof(block) / 8); - - ExpandBlock4HL_32(block, dst, dstpitch, pal); - - #endif - } - - // TODO: ReadAndExpandBlock4HL_16 - - __forceinline static void ReadAndExpandBlock4HH_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) - { - //printf("ReadAndExpandBlock4HH_32\n"); - - #if _M_SSE >= 0x401 - - const GSVector4i* s = (const GSVector4i*)src; - - GSVector4i v0, v1, v2, v3; - - for(int i = 0; i < 4; i++) - { - v0 = s[i * 4 + 0]; - v1 = s[i * 4 + 1]; - v2 = s[i * 4 + 2]; - v3 = s[i * 4 + 3]; - - GSVector4i::sw64(v0, v1, v2, v3); - - (v0 >> 28).gather32_32<>(pal, (GSVector4i*)&dst[0]); - (v1 >> 28).gather32_32<>(pal, (GSVector4i*)&dst[16]); - - dst += dstpitch; - - (v2 >> 28).gather32_32<>(pal, (GSVector4i*)&dst[0]); - (v3 >> 28).gather32_32<>(pal, (GSVector4i*)&dst[16]); - - dst += dstpitch; - } - - #else - - __aligned(uint32, 32) block[8 * 8]; - - ReadBlock32(src, (uint8*)block, sizeof(block) / 8); - - ExpandBlock4HH_32(block, dst, dstpitch, pal); - - #endif - } - - // TODO: ReadAndExpandBlock4HH_16 -}; diff --git a/plugins/GSdx_legacy/GSCapture.cpp b/plugins/GSdx_legacy/GSCapture.cpp deleted file mode 100644 index cebbb67680..0000000000 --- a/plugins/GSdx_legacy/GSCapture.cpp +++ /dev/null @@ -1,585 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSCapture.h" -#include "GSPng.h" -#include "GSUtil.h" - -#ifdef _WIN32 - -class CPinInfo : public PIN_INFO { -public: - CPinInfo() { pFilter = NULL; } - ~CPinInfo() { if (pFilter) pFilter->Release(); } -}; - -class CFilterInfo : public FILTER_INFO { -public: - CFilterInfo() { pGraph = NULL; } - ~CFilterInfo() { if (pGraph) pGraph->Release(); } -}; - -#define BeginEnumFilters(pFilterGraph, pEnumFilters, pBaseFilter) \ - {CComPtr pEnumFilters; \ - if(pFilterGraph && SUCCEEDED(pFilterGraph->EnumFilters(&pEnumFilters))) \ - { \ - for(CComPtr pBaseFilter; S_OK == pEnumFilters->Next(1, &pBaseFilter, 0); pBaseFilter = NULL) \ - { \ - -#define EndEnumFilters }}} - -#define BeginEnumPins(pBaseFilter, pEnumPins, pPin) \ - {CComPtr pEnumPins; \ - if(pBaseFilter && SUCCEEDED(pBaseFilter->EnumPins(&pEnumPins))) \ - { \ - for(CComPtr pPin; S_OK == pEnumPins->Next(1, &pPin, 0); pPin = NULL) \ - { \ - -#define EndEnumPins }}} - -// -// GSSource -// - -#ifdef __INTEL_COMPILER -interface __declspec(uuid("59C193BB-C520-41F3-BC1D-E245B80A86FA")) -#else -[uuid("59C193BB-C520-41F3-BC1D-E245B80A86FA")] interface -#endif -IGSSource : public IUnknown -{ - STDMETHOD(DeliverNewSegment)() PURE; - STDMETHOD(DeliverFrame)(const void* bits, int pitch, bool rgba) PURE; - STDMETHOD(DeliverEOS)() PURE; -}; - -#ifdef __INTEL_COMPILER -class __declspec(uuid("F8BB6F4F-0965-4ED4-BA74-C6A01E6E6C77")) -#else -[uuid("F8BB6F4F-0965-4ED4-BA74-C6A01E6E6C77")] class -#endif -GSSource : public CBaseFilter, private CCritSec, public IGSSource -{ - GSVector2i m_size; - REFERENCE_TIME m_atpf; - REFERENCE_TIME m_now; - - STDMETHODIMP NonDelegatingQueryInterface(REFIID riid, void** ppv) - { - return - riid == __uuidof(IGSSource) ? GetInterface((IGSSource*)this, ppv) : - __super::NonDelegatingQueryInterface(riid, ppv); - } - - class GSSourceOutputPin : public CBaseOutputPin - { - GSVector2i m_size; - vector m_mts; - - public: - GSSourceOutputPin(const GSVector2i& size, REFERENCE_TIME atpf, CBaseFilter* pFilter, CCritSec* pLock, HRESULT& hr, int colorspace) - : CBaseOutputPin("GSSourceOutputPin", pFilter, pLock, &hr, L"Output") - , m_size(size) - { - CMediaType mt; - mt.majortype = MEDIATYPE_Video; - mt.formattype = FORMAT_VideoInfo; - - VIDEOINFOHEADER vih; - memset(&vih, 0, sizeof(vih)); - vih.AvgTimePerFrame = atpf; - vih.bmiHeader.biSize = sizeof(vih.bmiHeader); - vih.bmiHeader.biWidth = m_size.x; - vih.bmiHeader.biHeight = m_size.y; - - // YUY2 - - mt.subtype = MEDIASUBTYPE_YUY2; - mt.lSampleSize = m_size.x * m_size.y * 2; - - vih.bmiHeader.biCompression = '2YUY'; - vih.bmiHeader.biPlanes = 1; - vih.bmiHeader.biBitCount = 16; - vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 2; - mt.SetFormat((uint8*)&vih, sizeof(vih)); - - m_mts.push_back(mt); - - // RGB32 - - mt.subtype = MEDIASUBTYPE_RGB32; - mt.lSampleSize = m_size.x * m_size.y * 4; - - vih.bmiHeader.biCompression = BI_RGB; - vih.bmiHeader.biPlanes = 1; - vih.bmiHeader.biBitCount = 32; - vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 4; - mt.SetFormat((uint8*)&vih, sizeof(vih)); - - if(colorspace == 1) m_mts.insert(m_mts.begin(), mt); - else m_mts.push_back(mt); - } - - HRESULT GSSourceOutputPin::DecideBufferSize(IMemAllocator* pAlloc, ALLOCATOR_PROPERTIES* pProperties) - { - ASSERT(pAlloc && pProperties); - - HRESULT hr; - - pProperties->cBuffers = 1; - pProperties->cbBuffer = m_mt.lSampleSize; - - ALLOCATOR_PROPERTIES Actual; - - if(FAILED(hr = pAlloc->SetProperties(pProperties, &Actual))) - { - return hr; - } - - if(Actual.cbBuffer < pProperties->cbBuffer) - { - return E_FAIL; - } - - ASSERT(Actual.cBuffers == pProperties->cBuffers); - - return S_OK; - } - - HRESULT CheckMediaType(const CMediaType* pmt) - { - for(vector::iterator i = m_mts.begin(); i != m_mts.end(); i++) - { - if(i->majortype == pmt->majortype && i->subtype == pmt->subtype) - { - return S_OK; - } - } - - return E_FAIL; - } - - HRESULT GetMediaType(int i, CMediaType* pmt) - { - CheckPointer(pmt, E_POINTER); - - if(i < 0) return E_INVALIDARG; - if(i > 1) return VFW_S_NO_MORE_ITEMS; - - *pmt = m_mts[i]; - - return S_OK; - } - - STDMETHODIMP Notify(IBaseFilter* pSender, Quality q) - { - return E_NOTIMPL; - } - - const CMediaType& CurrentMediaType() - { - return m_mt; - } - }; - - GSSourceOutputPin* m_output; - -public: - - GSSource(int w, int h, float fps, IUnknown* pUnk, HRESULT& hr, int colorspace) - : CBaseFilter(NAME("GSSource"), pUnk, this, __uuidof(this), &hr) - , m_output(NULL) - , m_size(w, h) - , m_atpf((REFERENCE_TIME)(10000000.0f / fps)) - , m_now(0) - { - m_output = new GSSourceOutputPin(m_size, m_atpf, this, this, hr, colorspace); - } - - virtual ~GSSource() - { - delete m_output; - } - - DECLARE_IUNKNOWN; - - int GetPinCount() - { - return 1; - } - - CBasePin* GetPin(int n) - { - return n == 0 ? m_output : NULL; - } - - // IGSSource - - STDMETHODIMP DeliverNewSegment() - { - m_now = 0; - - return m_output->DeliverNewSegment(0, _I64_MAX, 1.0); - } - - STDMETHODIMP DeliverFrame(const void* bits, int pitch, bool rgba) - { - if(!m_output || !m_output->IsConnected()) - { - return E_UNEXPECTED; - } - - CComPtr sample; - - if(FAILED(m_output->GetDeliveryBuffer(&sample, NULL, NULL, 0))) - { - return E_FAIL; - } - - REFERENCE_TIME start = m_now; - REFERENCE_TIME stop = m_now + m_atpf; - - sample->SetTime(&start, &stop); - sample->SetSyncPoint(TRUE); - - const CMediaType& mt = m_output->CurrentMediaType(); - - uint8* src = (uint8*)bits; - uint8* dst = NULL; - - sample->GetPointer(&dst); - - int w = m_size.x; - int h = m_size.y; - int srcpitch = pitch; - - if(mt.subtype == MEDIASUBTYPE_YUY2) - { - int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 2; - - GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f); - GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f); - GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f); - - if(!rgba) - { - ys = ys.zyxw(); - us = us.zyxw(); - vs = vs.zyxw(); - } - - const GSVector4 offset(16, 128, 16, 128); - - for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) - { - uint32* s = (uint32*)src; - uint16* d = (uint16*)dst; - - for(int i = 0; i < w; i += 2) - { - GSVector4 c0 = GSVector4::rgba32(s[i + 0]); - GSVector4 c1 = GSVector4::rgba32(s[i + 1]); - GSVector4 c2 = c0 + c1; - - GSVector4 lo = (c0 * ys).hadd(c2 * us); - GSVector4 hi = (c1 * ys).hadd(c2 * vs); - - GSVector4 c = lo.hadd(hi) + offset; - - *((uint32*)&d[i]) = GSVector4i(c).rgba32(); - } - } - } - else if(mt.subtype == MEDIASUBTYPE_RGB32) - { - int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 4; - - dst += dstpitch * (h - 1); - dstpitch = -dstpitch; - - for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) - { - if(rgba) - { - #if _M_SSE >= 0x301 - - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - GSVector4i mask(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); - - for(int i = 0, w4 = w >> 2; i < w4; i++) - { - d[i] = s[i].shuffle8(mask); - } - - #else - - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - for(int i = 0, w4 = w >> 2; i < w4; i++) - { - d[i] = ((s[i] & 0x00ff0000) >> 16) | ((s[i] & 0x000000ff) << 16) | (s[i] & 0x0000ff00); - } - - #endif - } - else - { - memcpy(dst, src, w * 4); - } - } - } - else - { - return E_FAIL; - } - - if(FAILED(m_output->Deliver(sample))) - { - return E_FAIL; - } - - m_now = stop; - - return S_OK; - } - - STDMETHODIMP DeliverEOS() - { - return m_output->DeliverEndOfStream(); - } -}; - -static IPin* GetFirstPin(IBaseFilter* pBF, PIN_DIRECTION dir) -{ - if(!pBF) return(NULL); - - BeginEnumPins(pBF, pEP, pPin) - { - PIN_DIRECTION dir2; - pPin->QueryDirection(&dir2); - if(dir == dir2) - { - IPin* pRet = pPin.Detach(); - pRet->Release(); - return(pRet); - } - } - EndEnumPins - - return(NULL); -} - -#endif - -// -// GSCapture -// - -GSCapture::GSCapture() - : m_capturing(false), m_frame(0) - , m_out_dir("/tmp/GSdx_Capture") // FIXME Later add an option -{ - m_out_dir = theApp.GetConfig("capture_out_dir", "/tmp/GSdx_Capture"); - m_threads = theApp.GetConfig("capture_threads", 4); -#ifdef __linux__ - m_compression_level = theApp.GetConfig("png_compression_level", Z_BEST_SPEED); -#endif -} - -GSCapture::~GSCapture() -{ - EndCapture(); -} - -bool GSCapture::BeginCapture(float fps, GSVector2i recomendedResolution, float aspect) -{ - printf("Recomended resolution: %d x %d, DAR for muxing: %.4f\n", recomendedResolution.x, recomendedResolution.y, aspect); - std::lock_guard lock(m_lock); - - ASSERT(fps != 0); - - EndCapture(); - -#ifdef _WIN32 - - GSCaptureDlg dlg; - - if(IDOK != dlg.DoModal()) return false; - - m_size.x = (dlg.m_width + 7) & ~7; - m_size.y = (dlg.m_height + 7) & ~7; - - wstring fn(dlg.m_filename.begin(), dlg.m_filename.end()); - - // - - HRESULT hr; - - CComPtr cgb; - CComPtr mux; - - if(FAILED(hr = m_graph.CoCreateInstance(CLSID_FilterGraph)) - || FAILED(hr = cgb.CoCreateInstance(CLSID_CaptureGraphBuilder2)) - || FAILED(hr = cgb->SetFiltergraph(m_graph)) - || FAILED(hr = cgb->SetOutputFileName(&MEDIASUBTYPE_Avi, fn.c_str(), &mux, NULL))) - { - return false; - } - - m_src = new GSSource(m_size.x, m_size.y, fps, NULL, hr, dlg.m_colorspace); - - if (dlg.m_enc==0) - { - if (FAILED(hr = m_graph->AddFilter(m_src, L"Source"))) - return false; - if (FAILED(hr = m_graph->ConnectDirect(GetFirstPin(m_src, PINDIR_OUTPUT), GetFirstPin(mux, PINDIR_INPUT), NULL))) - return false; - } - else - { - if(FAILED(hr = m_graph->AddFilter(m_src, L"Source")) - || FAILED(hr = m_graph->AddFilter(dlg.m_enc, L"Encoder"))) - { - return false; - } - - if(FAILED(hr = m_graph->ConnectDirect(GetFirstPin(m_src, PINDIR_OUTPUT), GetFirstPin(dlg.m_enc, PINDIR_INPUT), NULL)) - || FAILED(hr = m_graph->ConnectDirect(GetFirstPin(dlg.m_enc, PINDIR_OUTPUT), GetFirstPin(mux, PINDIR_INPUT), NULL))) - { - return false; - } - } - - BeginEnumFilters(m_graph, pEF, pBF) - { - CFilterInfo fi; - pBF->QueryFilterInfo(&fi); - wstring s(fi.achName); - printf("Filter [%p]: %s\n", pBF.p, string(s.begin(), s.end()).c_str()); - - BeginEnumPins(pBF, pEP, pPin) - { - CComPtr pPinTo; - pPin->ConnectedTo(&pPinTo); - - CPinInfo pi; - pPin->QueryPinInfo(&pi); - wstring s(pi.achName); - printf("- Pin [%p - %p]: %s (%s)\n", pPin.p, pPinTo.p, string(s.begin(), s.end()).c_str(), pi.dir ? "out" : "in"); - } - EndEnumPins - } - EndEnumFilters - - hr = CComQIPtr(m_graph)->Run(); - - CComQIPtr(m_src)->DeliverNewSegment(); - -#elif __linux__ - // Note I think it doesn't support multiple depth creation - GSmkdir(m_out_dir.c_str()); - - // Really cheap recording - m_frame = 0; - // Add option !!! - m_size.x = theApp.GetConfig("capture_resx", 1280); - m_size.y = theApp.GetConfig("capture_resy", 1024); - - for(int i = 0; i < m_threads; i++) { - m_workers.push_back(new GSPng::Worker()); - } -#endif - - m_capturing = true; - - return true; -} - -bool GSCapture::DeliverFrame(const void* bits, int pitch, bool rgba) -{ - std::lock_guard lock(m_lock); - - if(bits == NULL || pitch == 0) - { - ASSERT(0); - - return false; - } - -#ifdef _WIN32 - - if(m_src) - { - CComQIPtr(m_src)->DeliverFrame(bits, pitch, rgba); - - return true; - } - -#elif __linux__ - - std::string out_file = m_out_dir + format("/frame.%010d.png", m_frame); - //GSPng::Save(GSPng::RGB_PNG, out_file, (uint8*)bits, m_size.x, m_size.y, pitch, m_compression_level); - m_workers[m_frame%m_threads]->Push(shared_ptr(new GSPng::Transaction(GSPng::RGB_PNG, out_file, static_cast(bits), m_size.x, m_size.y, pitch, m_compression_level))); - - m_frame++; - -#endif - - return false; -} - -bool GSCapture::EndCapture() -{ - std::lock_guard lock(m_lock); - -#ifdef _WIN32 - - if(m_src) - { - CComQIPtr(m_src)->DeliverEOS(); - - m_src = NULL; - } - - if(m_graph) - { - CComQIPtr(m_graph)->Stop(); - - m_graph = NULL; - } - -#elif __linux__ - for(size_t i = 0; i < m_workers.size(); i++) { - m_workers[i]->Wait(); - } - - m_frame = 0; - -#endif - - m_capturing = false; - - return true; -} diff --git a/plugins/GSdx_legacy/GSCapture.h b/plugins/GSdx_legacy/GSCapture.h deleted file mode 100644 index 3fdb7afde1..0000000000 --- a/plugins/GSdx_legacy/GSCapture.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSVector.h" -#include "GSPng.h" - -#ifdef _WIN32 -#include "GSCaptureDlg.h" -#endif - -class GSCapture -{ - std::recursive_mutex m_lock; - bool m_capturing; - GSVector2i m_size; - uint64 m_frame; - std::string m_out_dir; - int m_threads; - - #ifdef _WIN32 - - CComPtr m_graph; - CComPtr m_src; - - #elif __linux__ - - vector m_workers; - int m_compression_level; - - #endif - -public: - GSCapture(); - virtual ~GSCapture(); - - bool BeginCapture(float fps, GSVector2i recomendedResolution, float aspect); - bool DeliverFrame(const void* bits, int pitch, bool rgba); - bool EndCapture(); - - bool IsCapturing() {return m_capturing;} - GSVector2i GetSize() {return m_size;} -}; diff --git a/plugins/GSdx_legacy/GSCaptureDlg.cpp b/plugins/GSdx_legacy/GSCaptureDlg.cpp deleted file mode 100644 index 0e3d187f28..0000000000 --- a/plugins/GSdx_legacy/GSCaptureDlg.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSCaptureDlg.h" - -#define BeginEnumSysDev(clsid, pMoniker) \ - {CComPtr pDevEnum4$##clsid; \ - pDevEnum4$##clsid.CoCreateInstance(CLSID_SystemDeviceEnum); \ - CComPtr pClassEnum4$##clsid; \ - if(SUCCEEDED(pDevEnum4$##clsid->CreateClassEnumerator(clsid, &pClassEnum4$##clsid, 0)) \ - && pClassEnum4$##clsid) \ - { \ - for(CComPtr pMoniker; pClassEnum4$##clsid->Next(1, &pMoniker, 0) == S_OK; pMoniker = NULL) \ - { \ - -#define EndEnumSysDev }}} - -GSCaptureDlg::GSCaptureDlg() - : GSDialog(IDD_CAPTURE) -{ - m_width = theApp.GetConfig("CaptureWidth", 640); - m_height = theApp.GetConfig("CaptureHeight", 480); - m_filename = theApp.GetConfig("CaptureFileName", ""); -} - -int GSCaptureDlg::GetSelCodec(Codec& c) -{ - INT_PTR data = 0; - - if(ComboBoxGetSelData(IDC_CODECS, data)) - { - if(data == 0) return 2; - - c = *(Codec*)data; - - if(!c.filter) - { - c.moniker->BindToObject(NULL, NULL, __uuidof(IBaseFilter), (void**)&c.filter); - - if(!c.filter) return 0; - } - - return 1; - } - - return 0; -} - -void GSCaptureDlg::OnInit() -{ - __super::OnInit(); - - SetTextAsInt(IDC_WIDTH, m_width); - SetTextAsInt(IDC_HEIGHT, m_height); - SetText(IDC_FILENAME, m_filename.c_str()); - - m_codecs.clear(); - - _bstr_t selected = theApp.GetConfig("CaptureVideoCodecDisplayName", "").c_str(); - - ComboBoxAppend(IDC_CODECS, "Uncompressed", 0, true); - - ComboBoxAppend(IDC_COLORSPACE, "YUY2", 0, true); - ComboBoxAppend(IDC_COLORSPACE, "RGB32", 1, false); - - CoInitialize(0); // this is obviously wrong here, each thread should call this on start, and where is CoUninitalize? - - BeginEnumSysDev(CLSID_VideoCompressorCategory, moniker) - { - Codec c; - - c.moniker = moniker; - - wstring prefix; - - LPOLESTR str = NULL; - - if(FAILED(moniker->GetDisplayName(NULL, NULL, &str))) - continue; - - if(wcsstr(str, L"@device:dmo:")) prefix = L"(DMO) "; - else if(wcsstr(str, L"@device:sw:")) prefix = L"(DS) "; - else if(wcsstr(str, L"@device:cm:")) prefix = L"(VfW) "; - - c.DisplayName = str; - - CoTaskMemFree(str); - - CComPtr pPB; - - if(FAILED(moniker->BindToStorage(0, 0, IID_IPropertyBag, (void**)&pPB))) - continue; - - _variant_t var; - - if(FAILED(pPB->Read(_bstr_t(_T("FriendlyName")), &var, NULL))) - continue; - - c.FriendlyName = prefix + var.bstrVal; - - m_codecs.push_back(c); - - string s(c.FriendlyName.begin(), c.FriendlyName.end()); - - ComboBoxAppend(IDC_CODECS, s.c_str(), (LPARAM)&m_codecs.back(), c.DisplayName == selected); - } - EndEnumSysDev -} - -bool GSCaptureDlg::OnCommand(HWND hWnd, UINT id, UINT code) -{ - if(id == IDC_BROWSE && code == BN_CLICKED) - { - char buff[MAX_PATH] = {0}; - - OPENFILENAME ofn; - - memset(&ofn, 0, sizeof(ofn)); - - ofn.lStructSize = sizeof(ofn); - ofn.hwndOwner = m_hWnd; - ofn.lpstrFile = buff; - ofn.nMaxFile = countof(buff); - ofn.lpstrFilter = "Avi files (*.avi)\0*.avi\0"; - ofn.Flags = OFN_EXPLORER | OFN_ENABLESIZING | OFN_HIDEREADONLY | OFN_OVERWRITEPROMPT | OFN_PATHMUSTEXIST; - - strcpy(ofn.lpstrFile, m_filename.c_str()); - - if(GetSaveFileName(&ofn)) - { - m_filename = ofn.lpstrFile; - - SetText(IDC_FILENAME, m_filename.c_str()); - } - - return true; - } - else if(id == IDC_CONFIGURE && code == BN_CLICKED) - { - Codec c; - - if(GetSelCodec(c) == 1) - { - if(CComQIPtr pSPP = c.filter) - { - CAUUID caGUID; - - memset(&caGUID, 0, sizeof(caGUID)); - - if(SUCCEEDED(pSPP->GetPages(&caGUID))) - { - IUnknown* lpUnk = NULL; - pSPP.QueryInterface(&lpUnk); - OleCreatePropertyFrame(m_hWnd, 0, 0, c.FriendlyName.c_str(), 1, (IUnknown**)&lpUnk, caGUID.cElems, caGUID.pElems, 0, 0, NULL); - lpUnk->Release(); - - if(caGUID.pElems) CoTaskMemFree(caGUID.pElems); - } - } - else if(CComQIPtr pAMVfWCD = c.filter) - { - if(pAMVfWCD->ShowDialog(VfwCompressDialog_QueryConfig, NULL) == S_OK) - { - pAMVfWCD->ShowDialog(VfwCompressDialog_Config, m_hWnd); - } - } - } - - return true; - } - else if(id == IDOK) - { - m_width = GetTextAsInt(IDC_WIDTH); - m_height = GetTextAsInt(IDC_HEIGHT); - m_filename = GetText(IDC_FILENAME); - ComboBoxGetSelData(IDC_COLORSPACE, (INT_PTR)m_colorspace); - - Codec c; - - int ris = GetSelCodec(c); - if(ris == 0) - { - return false; - } - - m_enc = c.filter; - - theApp.SetConfig("CaptureWidth", m_width); - theApp.SetConfig("CaptureHeight", m_height); - theApp.SetConfig("CaptureFileName", m_filename.c_str()); - - if (ris != 2) - { - theApp.SetConfig("CaptureVideoCodecDisplayName", c.DisplayName); - } - else - { - theApp.SetConfig("CaptureVideoCodecDisplayName", ""); - } - } - - return __super::OnCommand(hWnd, id, code); -} diff --git a/plugins/GSdx_legacy/GSCaptureDlg.h b/plugins/GSdx_legacy/GSCaptureDlg.h deleted file mode 100644 index 3efb3d5964..0000000000 --- a/plugins/GSdx_legacy/GSCaptureDlg.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDialog.h" -#include "resource.h" -#include - -class GSCaptureDlg : public GSDialog -{ - struct Codec - { - CComPtr moniker; - CComPtr filter; - wstring FriendlyName; - _bstr_t DisplayName; - }; - - list m_codecs; - - int GetSelCodec(Codec& c); - -protected: - void OnInit(); - bool OnCommand(HWND hWnd, UINT id, UINT code); - -public: - GSCaptureDlg(); - - int m_width; - int m_height; - string m_filename; - int m_colorspace; - CComPtr m_enc; -}; diff --git a/plugins/GSdx_legacy/GSClut.cpp b/plugins/GSdx_legacy/GSClut.cpp deleted file mode 100644 index a7eab9a4b2..0000000000 --- a/plugins/GSdx_legacy/GSClut.cpp +++ /dev/null @@ -1,744 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSClut.h" -#include "GSLocalMemory.h" - -#define CLUT_ALLOC_SIZE (2 * 4096) - -GSClut::GSClut(GSLocalMemory* mem) - : m_mem(mem) -{ - uint8* p = (uint8*)vmalloc(CLUT_ALLOC_SIZE, false); - - m_clut = (uint16*)&p[0]; // 1k + 1k for mirrored area simulating wrapping memory - m_buff32 = (uint32*)&p[2048]; // 1k - m_buff64 = (uint64*)&p[4096]; // 2k - m_write.dirty = true; - m_read.dirty = true; - - for(int i = 0; i < 16; i++) - { - for(int j = 0; j < 64; j++) - { - m_wc[0][i][j] = &GSClut::WriteCLUT_NULL; - m_wc[1][i][j] = &GSClut::WriteCLUT_NULL; - } - } - - m_wc[0][PSM_PSMCT32][PSM_PSMT8] = &GSClut::WriteCLUT32_I8_CSM1; - m_wc[0][PSM_PSMCT32][PSM_PSMT8H] = &GSClut::WriteCLUT32_I8_CSM1; - m_wc[0][PSM_PSMCT32][PSM_PSMT4] = &GSClut::WriteCLUT32_I4_CSM1; - m_wc[0][PSM_PSMCT32][PSM_PSMT4HL] = &GSClut::WriteCLUT32_I4_CSM1; - m_wc[0][PSM_PSMCT32][PSM_PSMT4HH] = &GSClut::WriteCLUT32_I4_CSM1; - m_wc[0][PSM_PSMCT24][PSM_PSMT8] = &GSClut::WriteCLUT32_I8_CSM1; - m_wc[0][PSM_PSMCT24][PSM_PSMT8H] = &GSClut::WriteCLUT32_I8_CSM1; - m_wc[0][PSM_PSMCT24][PSM_PSMT4] = &GSClut::WriteCLUT32_I4_CSM1; - m_wc[0][PSM_PSMCT24][PSM_PSMT4HL] = &GSClut::WriteCLUT32_I4_CSM1; - m_wc[0][PSM_PSMCT24][PSM_PSMT4HH] = &GSClut::WriteCLUT32_I4_CSM1; - m_wc[0][PSM_PSMCT16][PSM_PSMT8] = &GSClut::WriteCLUT16_I8_CSM1; - m_wc[0][PSM_PSMCT16][PSM_PSMT8H] = &GSClut::WriteCLUT16_I8_CSM1; - m_wc[0][PSM_PSMCT16][PSM_PSMT4] = &GSClut::WriteCLUT16_I4_CSM1; - m_wc[0][PSM_PSMCT16][PSM_PSMT4HL] = &GSClut::WriteCLUT16_I4_CSM1; - m_wc[0][PSM_PSMCT16][PSM_PSMT4HH] = &GSClut::WriteCLUT16_I4_CSM1; - m_wc[0][PSM_PSMCT16S][PSM_PSMT8] = &GSClut::WriteCLUT16S_I8_CSM1; - m_wc[0][PSM_PSMCT16S][PSM_PSMT8H] = &GSClut::WriteCLUT16S_I8_CSM1; - m_wc[0][PSM_PSMCT16S][PSM_PSMT4] = &GSClut::WriteCLUT16S_I4_CSM1; - m_wc[0][PSM_PSMCT16S][PSM_PSMT4HL] = &GSClut::WriteCLUT16S_I4_CSM1; - m_wc[0][PSM_PSMCT16S][PSM_PSMT4HH] = &GSClut::WriteCLUT16S_I4_CSM1; - - m_wc[1][PSM_PSMCT32][PSM_PSMT8] = &GSClut::WriteCLUT32_CSM2<256>; - m_wc[1][PSM_PSMCT32][PSM_PSMT8H] = &GSClut::WriteCLUT32_CSM2<256>; - m_wc[1][PSM_PSMCT32][PSM_PSMT4] = &GSClut::WriteCLUT32_CSM2<16>; - m_wc[1][PSM_PSMCT32][PSM_PSMT4HL] = &GSClut::WriteCLUT32_CSM2<16>; - m_wc[1][PSM_PSMCT32][PSM_PSMT4HH] = &GSClut::WriteCLUT32_CSM2<16>; - m_wc[1][PSM_PSMCT24][PSM_PSMT8] = &GSClut::WriteCLUT32_CSM2<256>; - m_wc[1][PSM_PSMCT24][PSM_PSMT8H] = &GSClut::WriteCLUT32_CSM2<256>; - m_wc[1][PSM_PSMCT24][PSM_PSMT4] = &GSClut::WriteCLUT32_CSM2<16>; - m_wc[1][PSM_PSMCT24][PSM_PSMT4HL] = &GSClut::WriteCLUT32_CSM2<16>; - m_wc[1][PSM_PSMCT24][PSM_PSMT4HH] = &GSClut::WriteCLUT32_CSM2<16>; - m_wc[1][PSM_PSMCT16][PSM_PSMT8] = &GSClut::WriteCLUT16_CSM2<256>; - m_wc[1][PSM_PSMCT16][PSM_PSMT8H] = &GSClut::WriteCLUT16_CSM2<256>; - m_wc[1][PSM_PSMCT16][PSM_PSMT4] = &GSClut::WriteCLUT16_CSM2<16>; - m_wc[1][PSM_PSMCT16][PSM_PSMT4HL] = &GSClut::WriteCLUT16_CSM2<16>; - m_wc[1][PSM_PSMCT16][PSM_PSMT4HH] = &GSClut::WriteCLUT16_CSM2<16>; - m_wc[1][PSM_PSMCT16S][PSM_PSMT8] = &GSClut::WriteCLUT16S_CSM2<256>; - m_wc[1][PSM_PSMCT16S][PSM_PSMT8H] = &GSClut::WriteCLUT16S_CSM2<256>; - m_wc[1][PSM_PSMCT16S][PSM_PSMT4] = &GSClut::WriteCLUT16S_CSM2<16>; - m_wc[1][PSM_PSMCT16S][PSM_PSMT4HL] = &GSClut::WriteCLUT16S_CSM2<16>; - m_wc[1][PSM_PSMCT16S][PSM_PSMT4HH] = &GSClut::WriteCLUT16S_CSM2<16>; -} - -GSClut::~GSClut() -{ - vmfree(m_clut, CLUT_ALLOC_SIZE); -} - -void GSClut::Invalidate() -{ - m_write.dirty = true; -} - -bool GSClut::WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - switch(TEX0.CLD) - { - case 0: return false; - case 1: break; - case 2: m_CBP[0] = TEX0.CBP; break; - case 3: m_CBP[1] = TEX0.CBP; break; - case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break; - case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break; - case 6: ASSERT(0); return false; // ffx2 menu - case 7: ASSERT(0); return false; // ford mustang racing // Bouken Jidai Katsugeki Goemon - default: __assume(0); - } - - return m_write.IsDirty(TEX0, TEXCLUT); -} - -void GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - m_write.TEX0 = TEX0; - m_write.TEXCLUT = TEXCLUT; - m_write.dirty = false; - m_read.dirty = true; - - (this->*m_wc[TEX0.CSM][TEX0.CPSM][TEX0.PSM])(TEX0, TEXCLUT); - - // Mirror write to other half of buffer to simulate wrapping memory - - int offset = (TEX0.CSA & (TEX0.CPSM < PSM_PSMCT16 ? 15 : 31)) * 16; - - if(TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H) - { - int size = TEX0.CPSM < PSM_PSMCT16 ? 512 : 256; - - memcpy(m_clut + 512 + offset, m_clut + offset, sizeof(*m_clut) * min(size, 512 - offset)); - memcpy(m_clut, m_clut + 512, sizeof(*m_clut) * max(0, size + offset - 512)); - } - else - { - int size = 16; - - memcpy(m_clut + 512 + offset, m_clut + offset, sizeof(*m_clut) * size); - - if(TEX0.CPSM < PSM_PSMCT16) - { - memcpy(m_clut + 512 + 256 + offset, m_clut + 256 + offset, sizeof(*m_clut) * size); - } - } -} - -void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - ALIGN_STACK(32); - - WriteCLUT_T32_I8_CSM1((uint32*)m_mem->BlockPtr32(0, 0, TEX0.CBP, 1), m_clut + ((TEX0.CSA & 15) << 4)); -} - -void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - ALIGN_STACK(32); - - WriteCLUT_T32_I4_CSM1((uint32*)m_mem->BlockPtr32(0, 0, TEX0.CBP, 1), m_clut + ((TEX0.CSA & 15) << 4)); -} - -void GSClut::WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - WriteCLUT_T16_I8_CSM1((uint16*)m_mem->BlockPtr16(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4)); -} - -void GSClut::WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - WriteCLUT_T16_I4_CSM1((uint16*)m_mem->BlockPtr16(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4)); -} - -void GSClut::WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - WriteCLUT_T16_I8_CSM1((uint16*)m_mem->BlockPtr16S(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4)); -} - -void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - WriteCLUT_T16_I4_CSM1((uint16*)m_mem->BlockPtr16S(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4)); -} - -template void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32); - - uint32* RESTRICT s = &m_mem->m_vm32[off->pixel.row[TEXCLUT.COV]]; - int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4]; - - uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4); - - for(int i = 0; i < n; i++) - { - uint32 c = s[col[i]]; - - clut[i] = (uint16)(c & 0xffff); - clut[i + 256] = (uint16)(c >> 16); - } -} - -template void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16); - - uint16* RESTRICT s = &m_mem->m_vm16[off->pixel.row[TEXCLUT.COV]]; - int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4]; - - uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4); - - for(int i = 0; i < n; i++) - { - clut[i] = s[col[i]]; - } -} - -template void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S); - - uint16* RESTRICT s = &m_mem->m_vm16[off->pixel.row[TEXCLUT.COV]]; - int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4]; - - uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4); - - for(int i = 0; i < n; i++) - { - clut[i] = s[col[i]]; - } -} - -#if 0 -void GSClut::Read(const GIFRegTEX0& TEX0) -{ - if(m_read.IsDirty(TEX0)) - { - m_read.TEX0 = TEX0; - m_read.dirty = false; - - uint16* clut = m_clut; - - if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) - { - switch(TEX0.PSM) - { - case PSM_PSMT8: - case PSM_PSMT8H: - clut += (TEX0.CSA & 15) << 4; - ReadCLUT_T32_I8(clut, m_buff32); - break; - case PSM_PSMT4: - case PSM_PSMT4HL: - case PSM_PSMT4HH: - clut += (TEX0.CSA & 15) << 4; - ReadCLUT_T32_I4(clut, m_buff32, m_buff64); - break; - } - } - else if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S) - { - switch(TEX0.PSM) - { - case PSM_PSMT8: - case PSM_PSMT8H: - clut += TEX0.CSA << 4; - ReadCLUT_T16_I8(clut, m_buff32); - break; - case PSM_PSMT4: - case PSM_PSMT4HL: - case PSM_PSMT4HH: - clut += TEX0.CSA << 4; - ReadCLUT_T16_I4(clut, m_buff32, m_buff64); - break; - } - } - } -} -#endif - -void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) -{ - if(m_read.IsDirty(TEX0, TEXA)) - { - m_read.TEX0 = TEX0; - m_read.TEXA = TEXA; - m_read.dirty = false; - m_read.adirty = true; - - uint16* clut = m_clut; - - if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) - { - switch(TEX0.PSM) - { - case PSM_PSMT8: - case PSM_PSMT8H: - clut += (TEX0.CSA & 15) << 4; // disney golf title screen - ReadCLUT_T32_I8(clut, m_buff32); - break; - case PSM_PSMT4: - case PSM_PSMT4HL: - case PSM_PSMT4HH: - clut += (TEX0.CSA & 15) << 4; - // TODO: merge these functions - ReadCLUT_T32_I4(clut, m_buff32); - ExpandCLUT64_T32_I8(m_buff32, (uint64*)m_buff64); // sw renderer does not need m_buff64 anymore - break; - } - } - else if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S) - { - switch(TEX0.PSM) - { - case PSM_PSMT8: - case PSM_PSMT8H: - clut += TEX0.CSA << 4; - Expand16(clut, m_buff32, 256, TEXA); - break; - case PSM_PSMT4: - case PSM_PSMT4HL: - case PSM_PSMT4HH: - clut += TEX0.CSA << 4; - // TODO: merge these functions - Expand16(clut, m_buff32, 16, TEXA); - ExpandCLUT64_T32_I8(m_buff32, (uint64*)m_buff64); // sw renderer does not need m_buff64 anymore - break; - } - } - } -} - -void GSClut::GetAlphaMinMax32(int& amin, int& amax) -{ - // call only after Read32 - - ASSERT(!m_read.dirty); - - if(m_read.adirty) - { - m_read.adirty = false; - - if(GSLocalMemory::m_psm[m_read.TEX0.CPSM].trbpp == 24 && m_read.TEXA.AEM == 0) - { - m_read.amin = m_read.TEXA.TA0; - m_read.amax = m_read.TEXA.TA0; - } - else - { - const GSVector4i* p = (const GSVector4i*)m_buff32; - - GSVector4i amin, amax; - - if(GSLocalMemory::m_psm[m_read.TEX0.PSM].pal == 256) - { - amin = GSVector4i::xffffffff(); - amax = GSVector4i::zero(); - - for(int i = 0; i < 16; i++) - { - GSVector4i v0 = (p[i * 4 + 0] >> 24).ps32(p[i * 4 + 1] >> 24); - GSVector4i v1 = (p[i * 4 + 2] >> 24).ps32(p[i * 4 + 3] >> 24); - GSVector4i v2 = v0.pu16(v1); - - amin = amin.min_u8(v2); - amax = amax.max_u8(v2); - } - } - else - { - ASSERT(GSLocalMemory::m_psm[m_read.TEX0.PSM].pal == 16); - - GSVector4i v0 = (p[0] >> 24).ps32(p[1] >> 24); - GSVector4i v1 = (p[2] >> 24).ps32(p[3] >> 24); - GSVector4i v2 = v0.pu16(v1); - - amin = v2; - amax = v2; - } - - amin = amin.min_u8(amin.zwxy()); - amax = amax.max_u8(amax.zwxy()); - amin = amin.min_u8(amin.zwxyl()); - amax = amax.max_u8(amax.zwxyl()); - amin = amin.min_u8(amin.yxwzl()); - amax = amax.max_u8(amax.yxwzl()); - - GSVector4i v0 = amin.upl8(amax).u8to16(); - GSVector4i v1 = v0.yxwz(); - - m_read.amin = v0.min_i16(v1).extract16<0>(); - m_read.amax = v0.max_i16(v1).extract16<1>(); - } - } - - amin = m_read.amin; - amax = m_read.amax; -} - -// - -void GSClut::WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut) -{ - // 4 blocks - - for(int i = 0; i < 64; i += 16) - { - WriteCLUT_T32_I4_CSM1(&src[i + 0], &clut[i * 2 + 0]); - WriteCLUT_T32_I4_CSM1(&src[i + 64], &clut[i * 2 + 16]); - WriteCLUT_T32_I4_CSM1(&src[i + 128], &clut[i * 2 + 128]); - WriteCLUT_T32_I4_CSM1(&src[i + 192], &clut[i * 2 + 144]); - } -} - -__forceinline void GSClut::WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut) -{ - // 1 block - - #if _M_SSE >= 0x501 - - GSVector8i* s = (GSVector8i*)src; - GSVector8i* d = (GSVector8i*)clut; - - GSVector8i v0 = s[0].acbd(); - GSVector8i v1 = s[1].acbd(); - - GSVector8i::sw16(v0, v1); - GSVector8i::sw16(v0, v1); - GSVector8i::sw16(v0, v1); - - d[0] = v0; - d[16] = v1; - - #else - - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)clut; - - GSVector4i v0 = s[0]; - GSVector4i v1 = s[1]; - GSVector4i v2 = s[2]; - GSVector4i v3 = s[3]; - - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw32(v0, v1, v2, v3); - GSVector4i::sw16(v0, v2, v1, v3); - - d[0] = v0; - d[1] = v2; - d[32] = v1; - d[33] = v3; - - #endif -} - -void GSClut::WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut) -{ - // 2 blocks - - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)clut; - - for(int i = 0; i < 32; i += 4) - { - GSVector4i v0 = s[i + 0]; - GSVector4i v1 = s[i + 1]; - GSVector4i v2 = s[i + 2]; - GSVector4i v3 = s[i + 3]; - - GSVector4i::sw16(v0, v1, v2, v3); - GSVector4i::sw32(v0, v1, v2, v3); - GSVector4i::sw16(v0, v2, v1, v3); - - d[i + 0] = v0; - d[i + 1] = v2; - d[i + 2] = v1; - d[i + 3] = v3; - } -} - -__forceinline void GSClut::WriteCLUT_T16_I4_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut) -{ - // 1 block (half) - - for(int i = 0; i < 16; i++) - { - clut[i] = src[clutTableT16I4[i]]; - } -} - -void GSClut::ReadCLUT_T32_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst) -{ - for(int i = 0; i < 256; i += 16) - { - ReadCLUT_T32_I4(&clut[i], &dst[i]); - } -} - -__forceinline void GSClut::ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst) -{ - GSVector4i* s = (GSVector4i*)clut; - GSVector4i* d = (GSVector4i*)dst; - - GSVector4i v0 = s[0]; - GSVector4i v1 = s[1]; - GSVector4i v2 = s[32]; - GSVector4i v3 = s[33]; - - GSVector4i::sw16(v0, v2, v1, v3); - - d[0] = v0; - d[1] = v1; - d[2] = v2; - d[3] = v3; -} - -#if 0 -__forceinline void GSClut::ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst32, uint64* RESTRICT dst64) -{ - GSVector4i* s = (GSVector4i*)clut; - GSVector4i* d32 = (GSVector4i*)dst32; - GSVector4i* d64 = (GSVector4i*)dst64; - - GSVector4i s0 = s[0]; - GSVector4i s1 = s[1]; - GSVector4i s2 = s[32]; - GSVector4i s3 = s[33]; - - GSVector4i::sw16(s0, s2, s1, s3); - - d32[0] = s0; - d32[1] = s1; - d32[2] = s2; - d32[3] = s3; - - ExpandCLUT64_T32(s0, s0, s1, s2, s3, &d64[0]); - ExpandCLUT64_T32(s1, s0, s1, s2, s3, &d64[32]); - ExpandCLUT64_T32(s2, s0, s1, s2, s3, &d64[64]); - ExpandCLUT64_T32(s3, s0, s1, s2, s3, &d64[96]); -} -#endif - -#if 0 -void GSClut::ReadCLUT_T16_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst) -{ - for(int i = 0; i < 256; i += 16) - { - ReadCLUT_T16_I4(&clut[i], &dst[i]); - } -} -#endif - -#if 0 -__forceinline void GSClut::ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst) -{ - GSVector4i* s = (GSVector4i*)clut; - GSVector4i* d = (GSVector4i*)dst; - - GSVector4i v0 = s[0]; - GSVector4i v1 = s[1]; - - d[0] = v0.upl16(); - d[1] = v0.uph16(); - d[2] = v1.upl16(); - d[3] = v1.uph16(); -} -#endif - -#if 0 -__forceinline void GSClut::ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst32, uint64* RESTRICT dst64) -{ - GSVector4i* s = (GSVector4i*)clut; - GSVector4i* d32 = (GSVector4i*)dst32; - GSVector4i* d64 = (GSVector4i*)dst64; - - GSVector4i v0 = s[0]; - GSVector4i v1 = s[1]; - - GSVector4i s0 = v0.upl16(); - GSVector4i s1 = v0.uph16(); - GSVector4i s2 = v1.upl16(); - GSVector4i s3 = v1.uph16(); - - d32[0] = s0; - d32[1] = s1; - d32[2] = s2; - d32[3] = s3; - - ExpandCLUT64_T16(s0, s0, s1, s2, s3, &d64[0]); - ExpandCLUT64_T16(s1, s0, s1, s2, s3, &d64[32]); - ExpandCLUT64_T16(s2, s0, s1, s2, s3, &d64[64]); - ExpandCLUT64_T16(s3, s0, s1, s2, s3, &d64[96]); -} -#endif - -void GSClut::ExpandCLUT64_T32_I8(const uint32* RESTRICT src, uint64* RESTRICT dst) -{ - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - GSVector4i s0 = s[0]; - GSVector4i s1 = s[1]; - GSVector4i s2 = s[2]; - GSVector4i s3 = s[3]; - - ExpandCLUT64_T32(s0, s0, s1, s2, s3, &d[0]); - ExpandCLUT64_T32(s1, s0, s1, s2, s3, &d[32]); - ExpandCLUT64_T32(s2, s0, s1, s2, s3, &d[64]); - ExpandCLUT64_T32(s3, s0, s1, s2, s3, &d[96]); -} - -__forceinline void GSClut::ExpandCLUT64_T32(const GSVector4i& hi, const GSVector4i& lo0, const GSVector4i& lo1, const GSVector4i& lo2, const GSVector4i& lo3, GSVector4i* dst) -{ - ExpandCLUT64_T32(hi.xxxx(), lo0, &dst[0]); - ExpandCLUT64_T32(hi.xxxx(), lo1, &dst[2]); - ExpandCLUT64_T32(hi.xxxx(), lo2, &dst[4]); - ExpandCLUT64_T32(hi.xxxx(), lo3, &dst[6]); - ExpandCLUT64_T32(hi.yyyy(), lo0, &dst[8]); - ExpandCLUT64_T32(hi.yyyy(), lo1, &dst[10]); - ExpandCLUT64_T32(hi.yyyy(), lo2, &dst[12]); - ExpandCLUT64_T32(hi.yyyy(), lo3, &dst[14]); - ExpandCLUT64_T32(hi.zzzz(), lo0, &dst[16]); - ExpandCLUT64_T32(hi.zzzz(), lo1, &dst[18]); - ExpandCLUT64_T32(hi.zzzz(), lo2, &dst[20]); - ExpandCLUT64_T32(hi.zzzz(), lo3, &dst[22]); - ExpandCLUT64_T32(hi.wwww(), lo0, &dst[24]); - ExpandCLUT64_T32(hi.wwww(), lo1, &dst[26]); - ExpandCLUT64_T32(hi.wwww(), lo2, &dst[28]); - ExpandCLUT64_T32(hi.wwww(), lo3, &dst[30]); -} - -__forceinline void GSClut::ExpandCLUT64_T32(const GSVector4i& hi, const GSVector4i& lo, GSVector4i* dst) -{ - dst[0] = lo.upl32(hi); - dst[1] = lo.uph32(hi); -} - -#if 0 -void GSClut::ExpandCLUT64_T16_I8(const uint32* RESTRICT src, uint64* RESTRICT dst) -{ - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - GSVector4i s0 = s[0]; - GSVector4i s1 = s[1]; - GSVector4i s2 = s[2]; - GSVector4i s3 = s[3]; - - ExpandCLUT64_T16(s0, s0, s1, s2, s3, &d[0]); - ExpandCLUT64_T16(s1, s0, s1, s2, s3, &d[32]); - ExpandCLUT64_T16(s2, s0, s1, s2, s3, &d[64]); - ExpandCLUT64_T16(s3, s0, s1, s2, s3, &d[96]); -} -#endif - -__forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector4i& lo0, const GSVector4i& lo1, const GSVector4i& lo2, const GSVector4i& lo3, GSVector4i* dst) -{ - ExpandCLUT64_T16(hi.xxxx(), lo0, &dst[0]); - ExpandCLUT64_T16(hi.xxxx(), lo1, &dst[2]); - ExpandCLUT64_T16(hi.xxxx(), lo2, &dst[4]); - ExpandCLUT64_T16(hi.xxxx(), lo3, &dst[6]); - ExpandCLUT64_T16(hi.yyyy(), lo0, &dst[8]); - ExpandCLUT64_T16(hi.yyyy(), lo1, &dst[10]); - ExpandCLUT64_T16(hi.yyyy(), lo2, &dst[12]); - ExpandCLUT64_T16(hi.yyyy(), lo3, &dst[14]); - ExpandCLUT64_T16(hi.zzzz(), lo0, &dst[16]); - ExpandCLUT64_T16(hi.zzzz(), lo1, &dst[18]); - ExpandCLUT64_T16(hi.zzzz(), lo2, &dst[20]); - ExpandCLUT64_T16(hi.zzzz(), lo3, &dst[22]); - ExpandCLUT64_T16(hi.wwww(), lo0, &dst[24]); - ExpandCLUT64_T16(hi.wwww(), lo1, &dst[26]); - ExpandCLUT64_T16(hi.wwww(), lo2, &dst[28]); - ExpandCLUT64_T16(hi.wwww(), lo3, &dst[30]); -} - -__forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector4i& lo, GSVector4i* dst) -{ - dst[0] = lo.upl16(hi); - dst[1] = lo.uph16(hi); -} - -// TODO - -static const GSVector4i s_bm(0x00007c00); -static const GSVector4i s_gm(0x000003e0); -static const GSVector4i s_rm(0x0000001f); - -void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA) -{ - ASSERT((w & 7) == 0); - - const GSVector4i rm = s_rm; - const GSVector4i gm = s_gm; - const GSVector4i bm = s_bm; - - GSVector4i TA0(TEXA.TA0 << 24); - GSVector4i TA1(TEXA.TA1 << 24); - - GSVector4i c, cl, ch; - - const GSVector4i* s = (const GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - if(!TEXA.AEM) - { - for(int i = 0, j = w >> 3; i < j; i++) - { - c = s[i]; - cl = c.upl16(c); - ch = c.uph16(c); - d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)); - d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15)); - } - } - else - { - for(int i = 0, j = w >> 3; i < j; i++) - { - c = s[i]; - cl = c.upl16(c); - ch = c.uph16(c); - d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)).andnot(cl == GSVector4i::zero()); - d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15)).andnot(ch == GSVector4i::zero()); - } - } -} - -// - -bool GSClut::WriteState::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) -{ - return dirty || !GSVector4i::load(this).eq(GSVector4i::load(&TEX0, &TEXCLUT)); -} - -bool GSClut::ReadState::IsDirty(const GIFRegTEX0& TEX0) -{ - return dirty || !GSVector4i::load(this).eq(GSVector4i::load(&TEX0, &this->TEXA)); -} - -bool GSClut::ReadState::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) -{ - return dirty || !GSVector4i::load(this).eq(GSVector4i::load(&TEX0, &TEXA)); -} diff --git a/plugins/GSdx_legacy/GSClut.h b/plugins/GSdx_legacy/GSClut.h deleted file mode 100644 index 171e5e1452..0000000000 --- a/plugins/GSdx_legacy/GSClut.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSVector.h" -#include "GSTables.h" -#include "GSAlignedClass.h" - -class GSLocalMemory; - -__aligned(class, 32) GSClut : public GSAlignedClass<32> -{ - GSLocalMemory* m_mem; - - uint32 m_CBP[2]; - uint16* m_clut; - uint32* m_buff32; - uint64* m_buff64; - - __aligned(struct, 32) WriteState - { - GIFRegTEX0 TEX0; - GIFRegTEXCLUT TEXCLUT; - bool dirty; - bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - } m_write; - - __aligned(struct, 32) ReadState - { - GIFRegTEX0 TEX0; - GIFRegTEXA TEXA; - bool dirty; - bool adirty; - int amin, amax; - bool IsDirty(const GIFRegTEX0& TEX0); - bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); - } m_read; - - typedef void (GSClut::*writeCLUT)(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - - writeCLUT m_wc[2][16][64]; - - void WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - void WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - void WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - void WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - void WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - void WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - - template void WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - template void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - template void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - - void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) {} // xenosaga 3, bios - - static void WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut); - static void WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut); - static void WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut); - static void WriteCLUT_T16_I4_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut); - static void ReadCLUT_T32_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst); - static void ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst); - //static void ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst32, uint64* RESTRICT dst64); - //static void ReadCLUT_T16_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst); - //static void ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst); - //static void ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst32, uint64* RESTRICT dst64); - static void ExpandCLUT64_T32_I8(const uint32* RESTRICT src, uint64* RESTRICT dst); - static void ExpandCLUT64_T32(const GSVector4i& hi, const GSVector4i& lo0, const GSVector4i& lo1, const GSVector4i& lo2, const GSVector4i& lo3, GSVector4i* dst); - static void ExpandCLUT64_T32(const GSVector4i& hi, const GSVector4i& lo, GSVector4i* dst); - //static void ExpandCLUT64_T16_I8(const uint32* RESTRICT src, uint64* RESTRICT dst); - static void ExpandCLUT64_T16(const GSVector4i& hi, const GSVector4i& lo0, const GSVector4i& lo1, const GSVector4i& lo2, const GSVector4i& lo3, GSVector4i* dst); - static void ExpandCLUT64_T16(const GSVector4i& hi, const GSVector4i& lo, GSVector4i* dst); - - static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA); - -public: - GSClut(GSLocalMemory* mem); - virtual ~GSClut(); - - void Invalidate(); - bool WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - void Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - //void Read(const GIFRegTEX0& TEX0); - void Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); - void GetAlphaMinMax32(int& amin, int& amax); - - uint32 operator [] (size_t i) const {return m_buff32[i];} - - operator const uint32*() const {return m_buff32;} - operator const uint64*() const {return m_buff64;} -}; diff --git a/plugins/GSdx_legacy/GSCodeBuffer.cpp b/plugins/GSdx_legacy/GSCodeBuffer.cpp deleted file mode 100644 index 9de0e189b3..0000000000 --- a/plugins/GSdx_legacy/GSCodeBuffer.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSCodeBuffer.h" - -GSCodeBuffer::GSCodeBuffer(size_t blocksize) - : m_blocksize(blocksize) - , m_pos(0) - , m_reserved(0) - , m_ptr(NULL) -{ -} - -GSCodeBuffer::~GSCodeBuffer() -{ - for(list::iterator i = m_buffers.begin(); i != m_buffers.end(); i++) - { - vmfree(*i, m_blocksize); - } -} - -void* GSCodeBuffer::GetBuffer(size_t size) -{ - ASSERT(size < m_blocksize); - ASSERT(m_reserved == 0); - - size = (size + 15) & ~15; - - if(m_ptr == NULL || m_pos + size > m_blocksize) - { - m_ptr = (uint8*)vmalloc(m_blocksize, true); - - m_pos = 0; - - m_buffers.push_back(m_ptr); - } - - uint8* ptr = &m_ptr[m_pos]; - - m_reserved = size; - - return ptr; -} - -void GSCodeBuffer::ReleaseBuffer(size_t size) -{ - ASSERT(size <= m_reserved); - - m_pos = ((m_pos + size) + 15) & ~15; - - ASSERT(m_pos < m_blocksize); - - m_reserved = 0; -} diff --git a/plugins/GSdx_legacy/GSCodeBuffer.h b/plugins/GSdx_legacy/GSCodeBuffer.h deleted file mode 100644 index 3345a4c7d5..0000000000 --- a/plugins/GSdx_legacy/GSCodeBuffer.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -class GSCodeBuffer -{ - list m_buffers; - size_t m_blocksize; - size_t m_pos, m_reserved; - uint8* m_ptr; - -public: - GSCodeBuffer(size_t blocksize = 4096 * 64); // 256k - virtual ~GSCodeBuffer(); - - void* GetBuffer(size_t size); - void ReleaseBuffer(size_t size); -}; diff --git a/plugins/GSdx_legacy/GSCrc.cpp b/plugins/GSdx_legacy/GSCrc.cpp deleted file mode 100644 index b4ab18d286..0000000000 --- a/plugins/GSdx_legacy/GSCrc.cpp +++ /dev/null @@ -1,570 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSCrc.h" - -CRC::Game CRC::m_games[] = -{ - {0x00000000, NoTitle, NoRegion, 0}, - {0x2113EA2E, MetalSlug6, JP, 0}, - {0x42E05BAF, TomoyoAfter, JP, PointListPalette}, - {0x7800DC84, Clannad, JP, PointListPalette}, - {0xA6167B59, Lamune, JP, PointListPalette}, - {0xDDB59F46, KyuuketsuKitanMoonties, JP, PointListPalette}, - {0xC8EE2562, PiaCarroteYoukosoGPGakuenPrincess, JP, PointListPalette}, - {0x6CF94A43, KazokuKeikakuKokoroNoKizuna, JP, PointListPalette}, - {0xEDAF602D, DuelSaviorDestiny, JP, PointListPalette}, - {0xA39517AB, FFX, EU, 0}, - {0xA39517AE, FFX, FR, 0}, - {0x941BB7D9, FFX, DE, 0}, - {0xA39517A9, FFX, IT, 0}, - {0x941BB7DE, FFX, ES, 0}, - {0xA80F497C, FFX, ES, 0}, - {0xB4414EA1, FFX, RU, 0}, - {0xEE97DB5B, FFX, RU, 0}, - {0xAEC495CC, FFX, RU, 0}, - {0xBB3D833A, FFX, US, 0}, - {0x6A4EFE60, FFX, JP, 0}, - {0x3866CA7E, FFX, ASIA, 0}, // int. - {0x658597E2, FFX, JP, 0}, // int. - {0x9AAC5309, FFX2, EU, 0}, - {0x9AAC530C, FFX2, FR, 0}, - {0x9AAC530A, FFX2, ES, 0}, - {0x9AAC530D, FFX2, DE, 0}, - {0x9AAC530B, FFX2, IT, 0}, - {0x48FE0C71, FFX2, US, 0}, - {0x8A6D7F14, FFX2, JP, 0}, - {0xE1FD9A2D, FFX2, JP, 0}, // int. - {0x11624CD6, FFX2, KO, 0}, - {0x78DA0252, FFXII, EU, 0}, - {0xC1274668, FFXII, EU, 0}, - {0xDC2A467E, FFXII, EU, 0}, - {0xCA284668, FFXII, EU, 0}, - {0xC52B466E, FFXII, EU, 0}, //ES - {0xE5E71BF9, FFXII, FR, 0}, - {0x280AD120, FFXII, JP, 0}, - {0x08C1ED4D, HauntingGround, EU, 0}, - {0x2CD5794C, HauntingGround, EU, 0}, - // {0x7D4EA48F, HauntingGround, EU, 0}, // same CRC as {Genji, EU} - {0x867BB945, HauntingGround, JP, 0}, - {0xE263BC4B, HauntingGround, JP, 0}, - {0x901AAC09, HauntingGround, US, 0}, - {0x21068223, Okami, US, 0}, - {0x891F223F, Okami, EU, 0}, // PAL DE, ES & FR. - {0xC5DEFEA0, Okami, JP, 0}, - {0x086273D2, MetalGearSolid3, EU, 0}, // - PAL UK & FR - {0x26A6E286, MetalGearSolid3, DE, 0}, - {0x9F185CE1, MetalGearSolid3, EU, 0}, - {0x98D4BC93, MetalGearSolid3, ES, 0}, - {0x79ED26AD, MetalGearSolid3, EU, 0}, - {0x5E31EA42, MetalGearSolid3, EU, 0}, - {0xD7ED797D, MetalGearSolid3, DE, 0}, - {0x053D2239, MetalGearSolid3, US, 0}, //Metal Gear Solid 3 Subsistence disc1 - {0x01B2FA7F, MetalGearSolid3, US, 0}, //Metal Gear Solid 3 Subsistence disc2 - {0xAA31B5BF, MetalGearSolid3, US, 0}, - {0x86BC3040, MetalGearSolid3, US, 0}, //Metal Gear Solid 3 Subsistence disc1 - {0x0481AD8A, MetalGearSolid3, JP, 0}, - {0xC69ACB6F, MetalGearSolid3, KO, 0}, //Metal Gear Solid 3 Snake Eater - {0xB0D195EF, MetalGearSolid3, KO, 0}, //Metal Gear Solid 3 Subsistence disc1 - {0x3EBABC9C, MetalGearSolid3, KO, 0}, //Metal Gear Solid 3 Subsistence disc2 - {0x8A5C25A7, MetalGearSolid3, ES, 0}, //Metal Gear Solid 3 Subsistence Spanish version - {0x278722BF, DBZBT2, US, 0}, - {0xFE961D28, DBZBT2, US, 0}, - {0x0393B6BE, DBZBT2, EU, 0}, - {0xE2F289ED, DBZBT2, JP, 0}, // Sparking Neo! - {0xE29C09A3, DBZBT2, KO, 0}, //DragonBall Z Sparking Neo - {0x0BAA4387, DBZBT2, JP, 0}, - {0x35AA84D1, DBZBT2, NoRegion, 0}, - {0xBE6A9CFB, DBZBT2, NoRegion, 0}, - {0x428113C2, DBZBT3, US, 0}, - {0xA422BB13, DBZBT3, EU, 0}, - {0xCE93CB30, DBZBT3, JP, 0}, - {0xF28D21F1, DBZBT3, JP, 0}, - {0x983C53D2, DBZBT3, NoRegion, 0}, - {0x983C53D3, DBZBT3, EU, 0}, - {0x9B0E119F, DBZBT3, KO, 0}, //DragonBall Z Sparking Meteo - {0x72B3802A, SFEX3, US, 0}, - {0x71521863, SFEX3, US, 0}, - {0x28703748, Bully, US, 0}, - {0x019CFA48, Bully, JP, 0}, - {0xC78A495D, BullyCC, US, 0}, - {0xC19A374E, SoTC, US, 0}, - {0x7D8F539A, SoTC, EU, 0}, - {0x0F0C4A9C, SoTC, EU, 0}, - {0x877F3436, SoTC, JP, 0}, - {0xA17D6AAA, SoTC, KO, 0}, - {0x877B3D35, SoTC, CH, 0}, - {0x3122B508, OnePieceGrandAdventure, US, 0}, - {0x8DF14A24, OnePieceGrandAdventure, EU, 0}, - {0xE446C9F9, OnePieceGrandAdventure, KO, 0}, - {0xCA2073B3, OnePieceGrandBattle, KO, 0}, - {0x66953267, OnePieceGrandAdventure, JP, 0}, - {0xE1674F57, OnePieceGrandBattle, EU, 0}, - {0x947B933B, OnePieceGrandAdventure, US, 0}, - {0xB049DD5E, OnePieceGrandBattle, US, 0}, - {0x5D02CC5B, OnePieceGrandBattle, NoRegion, 0}, - {0x6F8545DB, ICO, US, 0}, - {0xB01A4C95, ICO, JP, 0}, - {0x2DF2C1EA, ICO, KO, 0}, - {0x5C991F4E, ICO, EU, 0}, - {0x7ACF7E03, ICO, NoRegion, 0}, // same CRC as {SpyroNewBeginning, NoRegion} - // and as "Twisted Metal - Black" (PAL). - {0x788D8B4F, ICO, EU, 0}, - {0x29C28734, ICO, CH, 0}, - {0xAEAD1CA3, GT4, JP, 0}, - {0x30E41D93, GT4, KO, 0}, - {0x44A61C8F, GT4, EU, 0}, - {0x0086E35B, GT4, EU, 0}, - {0x77E61C8A, GT4, US, 0}, - {0x33C6E35E, GT4, US, 0}, - {0x7ABDBB5E, GT3, CH, 0}, // cutie comment - {0x3E9D448A, GT3, CH, 0}, // cutie comment - {0xAD66643C, GT3, CH, 0}, // cutie comment - {0x6810C3BC, GT3, CH, 0}, //GRAN TURISMO Concept 2002 Tokyo-Geneva - {0x85AE91B3, GT3, US, 0}, - {0xC220951A, GT3, JP, 0}, - {0x9DE5CF65, GT3, JP, 0}, //Gran Turismo 3: A-spec - {0x60013EBD, GTConcept, EU, 0}, - {0xB590CE04, GTConcept, EU, 0}, - {0x0EEF32A3, GTConcept, KO, 0}, //Gran Turismo Concept 2002 Tokyo-Seoul - {0xC164550A, WildArms5, JPUNDUB, 0}, - {0xC1640D2C, WildArms5, US, 0}, - {0x0FCF8FE4, WildArms5, EU, 0}, - {0x2294D322, WildArms5, JP, 0}, - {0x565B6170, WildArms5, JP, 0}, - {0xBBC3EFFA, WildArms4, US, 0}, - {0xBBC396EC, WildArms4, US, 0}, //hmm such a small diff in the CRC.. - {0x7B2DE9CC, WildArms4, EU, 0}, - {0x8B029334, Manhunt2, EU, 0}, - {0x3B0ADBEF, Manhunt2, US, 0}, - {0x09F49E37, CrashBandicootWoC, NoRegion, 0}, - {0x103B5706, CrashBandicootWoC, US, 0}, //American Greatest Hits release - {0x75182BE5, CrashBandicootWoC, US, 0}, - {0x5188ABCA, CrashBandicootWoC, US, 0}, - {0x3A03D62F, CrashBandicootWoC, EU, 0}, - {0x013E349D, ResidentEvil4, US, 0}, - {0xDBB7A559, ResidentEvil4, US, 0}, - {0x6BA2F6B9, ResidentEvil4, EU, 0}, - {0x60FA8C69, ResidentEvil4, JP, 0}, - {0x5F254B7C, ResidentEvil4, KO, 0}, - {0x72E1E60E, Spartan, EU, 0}, - {0x26689C87, Spartan, JP, 0}, - {0x08277A9E, Spartan, US, 0}, - {0xA32F7CD0, AceCombat4, US, 0}, - {0x5ED8FB53, AceCombat4, JP, 0}, - {0x1B9B7563, AceCombat4, EU, 0}, - {0xFC46EA61, Tekken5, JP, 0}, - {0x1F88EE37, Tekken5, EU, 0}, - {0x1F88BECD, Tekken5, EU, 0}, //language selector... - {0x652050D2, Tekken5, US, 0}, - {0xEA64EF39, Tekken5, KO, 0}, - {0x9E98B8AE, IkkiTousen, JP, 0}, - {0xD6385328, GodOfWar, US, 0}, - {0xF2A8D307, GodOfWar, US, 0}, - {0xFB0E6D72, GodOfWar, EU, 0}, - {0xEB001875, GodOfWar, EU, 0}, - {0xCF148C74, GodOfWar, EU, 0}, - {0xCA052D22, GodOfWar, JP, 0}, - {0xBFCC1795, GodOfWar, KO, 0}, - {0x9567B7D6, GodOfWar, KO, 0}, - {0x9B5C97BA, GodOfWar, KO, 0}, - {0xA61A4C6D, GodOfWar, US, 0}, - {0xE23D532B, GodOfWar, NoRegion, 0}, - {0xDF1AF973, GodOfWar, EU, 0}, - {0x1A85E924, GodOfWar, NoRegion, 0}, // cutie comment - {0x608ACBD3, GodOfWar, CH, 0}, // cutie comment - {0x2F123FD8, GodOfWar2, US, 0}, // same CRC as RU - {0x44A8A22A, GodOfWar2, EU, 0}, - {0x60BC362B, GodOfWar2, EU, 0}, - {0x4340C7C6, GodOfWar2, KO, 0}, - {0xE96E55BD, GodOfWar2, JP, 0}, - {0xF8CD3DF6, GodOfWar2, NoRegion, 0}, - {0x0B82BFF7, GodOfWar2, NoRegion, 0}, - {0x5990866F, GodOfWar2, NoRegion, 0}, - {0xC4C4FD5F, GodOfWar2, CH, 0}, - {0xDCD9A9F7, GodOfWar2, EU, 0}, - {0xFA0DF523, GodOfWar2, CH, 0}, // cutie comment - {0x9FEE3466, GodOfWar2, CH, 0}, // cutie comment - {0x5D482F18, JackieChanAdv, EU, 0}, - {0xF0A6D880, HarvestMoon, US, 0}, - {0x9536E111, NamcoXCapcom, JP, 0}, - {0x75C01A04, NamcoXCapcom, US, 0}, // same CRC as another JP disc - {0x95CC86EF, GiTS, US, 0}, // same CRC also reported as EU - {0xA5768F53, GiTS, JP, 0}, - {0xA3643EB1, GiTS, KO, 0}, - {0xBF6F101F, GiTS, EU, 0}, // same CRC as another US disc - {0x6BF11378, Onimusha3, US, 0}, - {0x71320CA8, Onimusha3, JP, 0}, - {0xDAFFFB0D, Onimusha3, KO, 0}, - {0xF442260C, MajokkoALaMode2, JP, 0}, - {0x14FE77F7, TalesOfAbyss, US, 0}, - {0x045D77E9, TalesOfAbyss, JPUNDUB, 0}, - {0xAA5EC3A3, TalesOfAbyss, JP, 0}, - {0xFB236A46, SonicUnleashed, US, 0}, - {0x8C913264, SonicUnleashed, EU, 0}, - {0x5C1EBD61, SimpsonsGame, EU, 0}, - {0x5C1EBF61, SimpsonsGame, FR, 0}, - {0x4C7BB3C8, SimpsonsGame, NoRegion, 0}, - {0x4C94B32C, SimpsonsGame, NoRegion, 0}, - {0x565B7E04, SimpsonsGame, IT, 0}, - {0x206779D8, SimpsonsGame, EU, 0}, - {0xBBE4D862, SimpsonsGame, US, 0}, - {0xD71B57F4, Genji, US, 0}, - {0xFADEBC45, Genji, EU, 0}, - {0xB4776FC1, Genji, JP, 0}, - {0x56242EC9, Genji, KO, 0}, - {0xCDAF243D, Genji, CH, 0}, - {0x2A5E0B61, Genji, CH, 0}, - {0x7D4EA48F, Genji, EU, 0}, // same CRC as {HauntingGround, EU} - {0xE04EA200, StarOcean3, EU, 0}, - {0x23A97857, StarOcean3, US, 0}, - {0xBEC32D49, StarOcean3, JP, 0}, - {0x8192A241, StarOcean3, JP, 0}, //NTSC JP special directors cut limited extra sugar on top edition (the special one :p) - // it's the US version with speach files from JP... {0x23A97857, StarOcean3, JPUNDUB, 0}, - {0xCC96CE93, ValkyrieProfile2, US, 0}, - {0x774DE8E2, ValkyrieProfile2, JP, 0}, - {0x04CCB600, ValkyrieProfile2, EU, 0}, - {0xB65E141B, ValkyrieProfile2, DE, 0}, // PAL German - {0xC70FC973, ValkyrieProfile2, IT, 0}, - {0x47B9B2FD, RadiataStories, US, 0}, - {0xAC73005E, RadiataStories, JP, 0}, - {0xE8FCF8EC, SMTNocturne, US, ZWriteMustNotClear}, // saves/reloads z buffer around shadow drawing, same issue with all the SMT games following - {0xF0A31EE3, SMTNocturne, EU, ZWriteMustNotClear}, // SMTNocturne (Lucifers Call in EU) - {0xAE0DE7B7, SMTNocturne, EU, ZWriteMustNotClear}, // SMTNocturne (Lucifers Call in EU) - {0xD60DA6D4, SMTNocturne, JP, ZWriteMustNotClear}, // SMTNocturne - {0x0E762E8D, SMTNocturne, JP, ZWriteMustNotClear}, // SMTNocturne Maniacs - {0x47BA9034, SMTNocturne, JP, ZWriteMustNotClear}, // SMTNocturne Maniacs Chronicle - {0xD3FFC263, SMTNocturne, KO, ZWriteMustNotClear}, - {0xD7273511, SMTDDS1, US, ZWriteMustNotClear}, // SMT Digital Devil Saga - {0x1683A6BE, SMTDDS1, EU, ZWriteMustNotClear}, // SMT Digital Devil Saga - {0x44865CE1, SMTDDS1, JP, ZWriteMustNotClear}, // SMT Digital Devil Saga - {0xF2E397C0, SMTDDS1, KO, ZWriteMustNotClear}, // SMT Digital Devil Saga - {0x43202D1A, SMTDDS2, KO, ZWriteMustNotClear}, // SMT Digital Devil Saga 2 - {0xD382C164, SMTDDS2, US, ZWriteMustNotClear}, // SMT Digital Devil Saga 2 - {0xD568B684, SMTDDS2, EU, ZWriteMustNotClear}, // SMT Digital Devil Saga 2 - {0xE47C1A9C, SMTDDS2, JP, ZWriteMustNotClear}, // SMT Digital Devil Saga 2 - {0x0B8AB37B, RozenMaidenGebetGarden, JP, 0}, - {0x1CC39DBD, SuikodenTactics, US, 0}, - {0x3E205556, SuikodenTactics, EU, 0}, - {0xB808413B, SuikodenTactics, JP, 0}, - {0x64C58FB4, TenchuFS, US, 0}, - {0xE7CCCB1E, TenchuFS, EU, 0}, - {0x1969B19A, TenchuFS, ES, 0}, //PAL Spanish - {0xBF0DC4CE, TenchuFS, DE, 0}, - {0x696BBEC3, TenchuFS, KO, 0}, - {0x525C1994, TenchuFS, ASIA, 0}, - {0x0D73BBCD, TenchuFS, KO, 0}, - {0xAFBFB287, TenchuWoH, KO, 0}, - {0x767E383D, TenchuWoH, US, 0}, - {0x83261085, TenchuWoH, DE, 0}, //PAL German - {0x7FA1510D, TenchuWoH, EU, 0}, //PAL ES, IT - {0xC8DADF58, TenchuWoH, EU, 0}, - {0x13DD9957, TenchuWoH, JP, 0}, - {0x8BC95883, Sly3, US, 0}, - {0x8164C614, Sly3, EU, 0}, - {0xA8CC1583, Sly3, KO, 0}, - {0x518DD841, Sly2, KO, 0}, - {0x07652DD9, Sly2, US, 0}, - {0xFDA1CBF6, Sly2, EU, 0}, - {0x15DD1F6F, Sly2, NoRegion, 0}, - {0xA9C82AB9, DemonStone, US, 0}, - {0x7C7578F3, DemonStone, EU, 0}, - {0x22425C19, DemonStone, KO, 0}, - {0x506644B3, BigMuthaTruckers, EU, 0}, - {0x90F0D852, BigMuthaTruckers, US, 0}, - {0x5CC9BF81, TimeSplitters2, EU, 0}, - {0x12532F1C, TimeSplitters2, US, 0}, - {0xC818BEC2, LordOfTheRingsTwoTowers, US, 0}, - {0xDC43F2B8, LordOfTheRingsTwoTowers, EU, 0}, - {0x9ABF90FB, LordOfTheRingsTwoTowers, ES, 0}, - {0x5FF407EE, LordOfTheRingsTwoTowers, IT, 0}, - {0xC0E909E9, LordOfTheRingsTwoTowers, JP, 0}, - {0x6898435D, LordOfTheRingsTwoTowers, KO, 0}, - {0xDC2F9B98, LordOfTheRingsTwoTowers, CH, 0}, // cutie comment - {0xEB198738, LordOfTheRingsThirdAge, US, 0}, - {0x614F4CF4, LordOfTheRingsThirdAge, EU, 0}, - {0x37CD4279, LordOfTheRingsThirdAge, KO, 0}, - {0xE169BAF8, RedDeadRevolver, US, 0}, - {0xE2E67E23, RedDeadRevolver, EU, 0}, - {0xEDDD6573, SpidermanWoS, US, 0}, //Web of Shadows - {0xF14C1D82, SpidermanWoS, EU, 0}, - {0xF56C7948, HeavyMetalThunder, JP, 0}, - {0x2498951B, SilentHill3, US, 0}, - {0x5088CCDB, SilentHill3, EU, 0}, - {0x8CFE667F, SilentHill3, JP, 0}, - {0xC6CBDE91, SilentHill3, KO, 0}, - {0x6B149273, SilentHill2, EU, 0}, - {0x6BBD4932, SilentHill2, EU, 0}, // Director's Cut - {0x8E8E384B, SilentHill2, US, 0}, - {0xFE06A030, SilentHill2, US, 0}, //greatest hits - {0xE36E16C9, SilentHill2, JP, 0}, - {0x380D6782, SilentHill2, JP, 0}, //Saigo no uta - {0x6DF62AEA, BleachBladeBattlers, JP, 0}, - {0x6EB71AB0, BleachBladeBattlers, JP, 0}, //2nd - {0x3A446111, CastlevaniaCoD, US, 0}, - {0xF321BC38, CastlevaniaCoD, EU, 0}, - {0x950876FA, CastlevaniaCoD, KO, 0}, - {0x237B84D3, CastlevaniaCoD, CH, 0}, - {0x28270F7D, CastlevaniaLoI, US, 0}, - {0x306CDADA, CastlevaniaLoI, EU, 0}, - {0xA36CFF6C, CastlevaniaLoI, JP, 0}, - {0x9A93FE5D, CastlevaniaLoI, KO, 0}, - {0xA79B0491, NanoBreaker, JP, 0}, - {0x7985D894, FinalFightStreetwise, US, 0}, - {0xED4BF0D3, FinalFightStreetwise, US, 0}, // cutie comment - {0x73C560BA, FinalFightStreetwise, EU, 0}, - {0xCBB87BF9, EvangelionJo, JP, 0}, // cutie comment - {0x278A91FD, CaptainTsubasa, JP, 0}, // cutie comment - {0xC5B75C7C, Oneechanbara2Special, JP, 0}, // cutie comment - {0xC0659AD1, NarutimateAccel, JP, 0}, // cutie comment - {0xF3D9DFBE, NarutimateAccel, JP, 0}, - {0x59739DDE, Naruto, JP, 0}, // cutie comment - {0xF7786EE4, EternalPoison, JP, 0}, // cutie comment - {0x2BE55519, EternalPoison, US, 0}, - {0xE01F57EC, LegoBatman, US, 0}, // cutie comment - {0xE01F57ED, LegoBatman, EU, 0}, - {0xE0347841, XE3, JP, 0}, // cutie comment - {0xA4E88698, XE3, CH, 0}, - {0x2088950A, XE3, US, 0}, - // DMC(1)? {0x79B8A95F, DevilMayCry3, US, 0}, - {0x7F3D692D, DevilMayCry3, CH, 0}, - // {0x1A85E924, DevilMayCry3, CH, 0}, // same CRC as {GodOfWar, NoRegion} - {0xB1995E29, ShadowofRome, EU, 0}, // cutie comment - {0x958DCA28, ShadowofRome, EU, 0}, - {0x57818AF6, ShadowofRome, US, 0}, - {0xF21EE6E0, CrashNburn, US, 0}, - {0x694A998E, TombRaiderUnderworld, JP, 0}, // cutie comment - {0x8E214549, TombRaiderUnderworld, EU, 0}, - {0xB639EB17, TombRaiderAnniversary, US, 0}, - {0xB05805B6, TombRaiderAnniversary, JP, 0}, // cutie comment - {0xA629A376, TombRaiderAnniversary, EU, 0}, - {0xBC8B3F50, TombRaiderLegend, US, 0}, // cutie comment - {0x05177ECE, TombRaiderLegend, EU, 0}, - {0x08FFF00D, SSX3, JP, 0}, // cutie comment - {0xCE942B2A, SSX3, EU, 0}, - {0x5C891FF1, Black, US, 0}, - {0xCAA04879, Black, EU, 0}, - {0xADDFF505, Black, EU, 0}, //? - {0xB3A9F9ED, Black, JP, 0}, - {0x7838882F, VF4, JP, 0}, - {0xEA131B57, VF4, US, 0}, - {0x4F755D39, TyTasmanianTiger, US, 0}, - {0xD59D3252, TyTasmanianTiger, EU, 0}, - {0x5A1BB2A1, TyTasmanianTiger2, US, 0}, - {0x44A5FA15, FFVIIDoC, US, 0}, - {0x33F7D21A, FFVIIDoC, EU, 0}, - {0xAFAC88EF, FFVIIDoC, JP, 0}, - {0x568A5C78, DigimonRumbleArena2, US, 0}, - {0x785E22BB, DigimonRumbleArena2, EU, 0}, - {0x4C5CE4C3, DigimonRumbleArena2, EU, 0}, - {0x7F995E8D, DigimonRumbleArena2, JP, 0}, - {0x115A184D, DigimonRumbleArena2, KO, 0}, - {0x879CDA5E, StarWarsForceUnleashed, US, 0}, - {0x137C792E, StarWarsForceUnleashed, US, 0}, - {0x503BF9E1, StarWarsBattlefront, NoRegion, 0}, // EU and US versions have the same CRC - {0x02F4B541, StarWarsBattlefront2, NoRegion, 0}, // EU and US versions have the same CRC - {0xA8DB29DF, BlackHawkDown, EU, 0}, - {0x25FC361B, DevilMayCry3, US, 0}, //SE - {0x2F7D8AD5, DevilMayCry3, US, 0}, - {0x0BED0AF9, DevilMayCry3, US, 0}, - {0x18C9343F, DevilMayCry3, EU, 0}, //SE - {0x7ADCB24A, DevilMayCry3, EU, 0}, - {0x79C952B0, DevilMayCry3, JP, 0}, //SE - {0x7F3DDEAB, DevilMayCry3, JP, 0}, - {0x05931990, DevilMayCry3, KO, 0}, - {0x4AD36D59, DevilMayCry3, RU, 0}, - {0xBEBF8793, BurnoutTakedown, US, 0}, - {0x75BECC18, BurnoutTakedown, EU, 0}, - {0xCE49B0DE, BurnoutTakedown, EU, 0}, - {0xD224D348, BurnoutRevenge, US, 0}, - {0x7E83CC5B, BurnoutRevenge, EU, 0}, - {0xEEA60511, BurnoutRevenge, KO, 0}, - {0x8C9576A1, BurnoutDominator, US, 0}, - {0x8C9576B4, BurnoutDominator, EU, 0}, - {0x4A0E5B3A, MidnightClub3, US, 0}, //dub - {0xEBE1972D, MidnightClub3, EU, 0}, //dub - {0x60A42FF5, MidnightClub3, US, 0}, //remix - {0x4B1A0FFA, XmenOriginsWolverine, US, 0}, - {0xBFF3DBCB, CallofDutyFinalFronts, US, 0}, - {0xB78A5F5A, CallofDutyFinalFronts, EU, 0}, - {0xD03D4C77, SpyroNewBeginning, US, 0}, - {0x0EE5646B, SpyroNewBeginning, EU, 0}, - // {0x7ACF7E03, SpyroNewBeginning, NoRegion, 0}, // same CRC as {ICO, NoRegion} - // and as "Twisted Metal - Black" (PAL). - {0xB80CE8EC, SpyroEternalNight, US, 0}, - {0x8AE9536D, SpyroEternalNight, EU, 0}, - {0xC95F0198, SpyroEternalNight, NoRegion, 0}, - {0x43AB7214, TalesOfLegendia, US, 0}, - {0x1F8640E0, TalesOfLegendia, JP, 0}, - {0xE4F5DA2B, TalesOfLegendia, KO, 0}, - {0x98C7B76D, NanoBreaker, US, 0}, - {0x7098BE76, NanoBreaker, KO, 0}, - {0x9B89F425, NanoBreaker, EU, 0}, - {0x519E816B, Kunoichi, US, 0}, //Nightshade - {0x3FB419FD, Kunoichi, JP, 0}, - {0x086D198E, Kunoichi, CH, 0}, - {0x3B470BBD, Kunoichi, EU, 0}, - {0x6BA65DD8, Kunoichi, KO, 0}, - {0XD3F182A3, Yakuza, EU, 0}, - {0x6F9F99F8, Yakuza, EU, 0}, - {0x388F687B, Yakuza, US, 0}, - {0xB7B3800A, Yakuza, JP, 0}, - {0xA60C2E65, Yakuza2, EU, 0}, - {0x800E3E5A, Yakuza2, EU, 0}, - {0x97E9C87E, Yakuza2, US, 0}, - {0xC6B95C48, Yakuza2, JP, 0}, - {0x9000252A, SkyGunner, JP, 0}, - {0x93092623, SkyGunner, JP, 0}, - {0xA9461CB2, SkyGunner, US, 0}, - {0xB799A60C, SkyGunner, NoRegion, 0}, - {0x6848699B, JamesBondEverythingOrNothing, US, 0}, - {0x5FFFDE40, JamesBondEverythingOrNothing, EU, 0}, - {0xF7FB054C, Siren, CH, 0}, // cutie comment - {0x47C2C34A, Siren, KO, 0}, - {0xB083CCC2, Siren, EU, 0}, // Spanish - {0x90F4B057, ZettaiZetsumeiToshi2, CH, 0}, - {0xC988ECBB, ZettaiZetsumeiToshi2, JP, 0}, - {0x2905C5C6, ZettaiZetsumeiToshi2, US, 0}, // Raw Danger! - {0x81CA29BE, VF4EVO, EU, 0}, - {0xC9DEF513, VF4EVO, US, 0}, - {0x7B402694, VF4EVO, KO, 0}, - {0xAB01411F, VF4EVO, JP, 0}, - {0xE11DFA28, Dororo, CH, 0}, - {0x89954774, Dororo, US, 0}, - {0xFDA2F2DF, Dororo, KO, 0}, - {0xBD17248E, ShinOnimusha, JP, 0}, - {0xBE17248E, ShinOnimusha, JP, 0}, - {0xB817248E, ShinOnimusha, JP, 0}, - {0x812C5A96, ShinOnimusha, EU, 0}, - {0xFE44479E, ShinOnimusha, US, 0}, - {0xFFDE85E9, ShinOnimusha, US, 0}, - {0xE21404E2, GetaWay, US, 0}, - {0xE78971DF, GetaWayBlackMonday, US, 0}, - {0x1130BF23, SakuraTaisen, CH, 0}, // cutie comment - {0x4FAE8B83, SakuraTaisen, KO, 0}, - {0xEF06DBD6, SakuraWarsSoLongMyLove, JP, 0}, // cutie comment - {0xDD41054D, SakuraWarsSoLongMyLove, US, 0}, // cutie comment - {0xC2E3A7A4, SakuraWarsSoLongMyLove, KO, 0}, - {0x4A4B623A, FightingBeautyWulong, JP,0}, // cutie comment - {0x5AC7E79C, TouristTrophy, CH, 0}, // cutie comment - {0xFF9C0E93, TouristTrophy, US, 0}, - {0xCA9AA903, TouristTrophy, EU, 0}, //crc hack not fully working on PAL, still needs brightness =0 - {0xA1B3F232, GTASanAndreas, EU, 0}, // cutie comment - {0x399A49CA, GTASanAndreas, US, 0}, - {0x60FE139C, GTASanAndreas, JP, 0}, - {0x2615F542, FrontMission5, JP, 0}, - {0xF60255AC, FrontMission5, JP, 0}, - {0xCB783836, FrontMission5, JP, 0}, - {0xAEDAEE99, GodHand, JP, 0}, - {0x6FB69282, GodHand, US, 0}, - {0x924C4AA6, GodHand, KO, 0}, - {0x9637D496, KnightsOfTheTemple2, JP, 0}, // cutie comment - {0x4E811100, UltramanFightingEvolution, JP, 0}, // cutie comment - {0xF7F181C3, DeathByDegreesTekkenNinaWilliams, CH, 0}, // cutie comment - {0xF088FA5B, DeathByDegreesTekkenNinaWilliams, KO, 0}, - {0x59683BB0, DeathByDegreesTekkenNinaWilliams, EU, 0}, - {0x771C3B47, AlpineRacer3, JP, 0}, // cutie comment - {0x7367D841, AlpineRacer3, EU, 0}, - {0x449E1F6B, HummerBadlands, US, 0}, - {0xAEA1B3AD, SengokuBasara, JP, 0}, - {0x5B659BED, Grandia3, JP, 0}, - {0x5B657DAD, Grandia3, US, 0}, - {0x830B6FB1, TalesofSymphonia, JP, 0}, - {0x8409FD51, TalesofDestiny, JP, 0}, // cutie comment - {0xA90CD846, TalesofDestiny, JP, 0}, - {0xC4D0FACC, SDGundamGGeneration, JP, 0}, // cutie comment - {0xBBDE6926, SDGundamGGeneration, JP, 0}, // cutie comment - {0x49D60A00, SDGundamGGeneration, JP, 0}, //NEO - {0x83AFB38A, SoulCalibur2, KO, 0}, - {0xE1B01308, SoulCalibur2, US, 0}, - {0xFB8554A0, SoulCalibur3, JP, 0}, - {0x027C604C, SoulCalibur3, US, 0}, - {0x24090A12, SoulCalibur3, EU, 0}, - {0x37B99B14, SoulCalibur3, KO, 0}, - {0xBC5480A3, SoulCalibur3, EU, 0}, - {0xFC0F8A5B, Simple2000Vol114, JP, 0}, - {0x0098F740, SeintoSeiya, NoRegion, 0}, // cutie comment - {0xBDD9BAAD, UrbanReign, US, 0}, // cutie comment - {0xAE4BEBD3, UrbanReign, EU, 0}, - {0x48AC09BC, SteambotChronicles, EU, 0}, - {0x9F391882, SteambotChronicles, US, 0}, - {0xFEFCF9DE, SteambotChronicles, JP, 0}, // Ponkotsu Roman Daikatsugeki: Bumpy Trot - {0XE1BF5DCA, SuperManReturns, US, 0}, - {0x06A7506A, SacredBlaze, JP, 0}, -}; - -hash_map CRC::m_map; - -string ToLower( string str ) -{ - transform( str.begin(), str.end(), str.begin(), ::tolower); - return str; -} - -// The exclusions list is a comma separated list of: the word "all" and/or CRCs in standard hex notation (0x and 8 digits with leading 0's if required). -// The list is case insensitive and order insensitive. -// E.g. Disable all CRC hacks: CrcHacksExclusions=all -// E.g. Disable hacks for these CRCs: CrcHacksExclusions=0x0F0C4A9C, 0x0EE5646B, 0x7ACF7E03 -bool IsCrcExcluded(string exclusionList, uint32 crc) -{ - string target = format( "0x%08x", crc ); - exclusionList = ToLower( exclusionList ); - return ( exclusionList.find( target ) != string::npos || exclusionList.find( "all" ) != string::npos ); -} - -CRC::Game CRC::Lookup(uint32 crc) -{ - if(m_map.empty()) - { - string exclusions = theApp.GetConfig( "CrcHacksExclusions", "" ); - if (exclusions.length() != 0) - printf( "GSdx: CrcHacksExclusions: %s\n", exclusions.c_str() ); - - int crcDups = 0; - for(size_t i = 0; i < countof(m_games); i++) - { - if( !IsCrcExcluded( exclusions, m_games[i].crc ) ){ - if(m_map[m_games[i].crc]){ - printf("[FIXME] GSdx: Duplicate CRC: 0x%x: (game-id/region-id) %d/%d overrides %d/%d\n" - , m_games[i].crc, m_games[i].title, m_games[i].region, m_map[m_games[i].crc]->title, m_map[m_games[i].crc]->region); - crcDups++; - } - - m_map[m_games[i].crc] = &m_games[i]; - } - //else - // printf( "GSdx: excluding CRC hack for 0x%08x\n", m_games[i].crc ); - } - if(crcDups) - printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups); - } - - hash_map::iterator i = m_map.find(crc); - - if(i != m_map.end()) - { - return *i->second; - } - - return m_games[0]; -} diff --git a/plugins/GSdx_legacy/GSCrc.h b/plugins/GSdx_legacy/GSCrc.h deleted file mode 100644 index c47f8ec6f2..0000000000 --- a/plugins/GSdx_legacy/GSCrc.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -class CRC -{ -public: - enum Title - { - NoTitle, - MetalSlug6, - TomoyoAfter, - Clannad, - Lamune, - KyuuketsuKitanMoonties, - PiaCarroteYoukosoGPGakuenPrincess, - KazokuKeikakuKokoroNoKizuna, - DuelSaviorDestiny, - FFX, - FFX2, - FFXII, - HauntingGround, - ShadowHearts, - Okami, - MetalGearSolid3, - DBZBT2, - DBZBT3, - SFEX3, - Bully, - BullyCC, - SoTC, - OnePieceGrandAdventure, - OnePieceGrandBattle, - ICO, - GT4, - GT3, - GTConcept, - WildArms5, - WildArms4, - Manhunt2, - CrashBandicootWoC, - ResidentEvil4, - Spartan, - AceCombat4, - Tekken5, - IkkiTousen, - GodOfWar, - GodOfWar2, - JackieChanAdv, - HarvestMoon, - NamcoXCapcom, - GiTS, - Onimusha3, - MajokkoALaMode2, - TalesOfAbyss, - SonicUnleashed, - SimpsonsGame, - Genji, - StarOcean3, - ValkyrieProfile2, - RadiataStories, - SMTNocturne, - SMTDDS1, - SMTDDS2, - RozenMaidenGebetGarden, - EvangelionJo, - SuikodenTactics, - CaptainTsubasa, - Oneechanbara2Special, - NarutimateAccel, - Naruto, - EternalPoison, - LegoBatman, - XE3, - TenchuWoH, - TenchuFS, - Sly3, - Sly2, - ShadowofRome, - DemonStone, - BigMuthaTruckers, - TimeSplitters2, - LordOfTheRingsTwoTowers, - LordOfTheRingsThirdAge, - RedDeadRevolver, - SpidermanWoS, - HeavyMetalThunder, - SilentHill3, - SilentHill2, - BleachBladeBattlers, - CastlevaniaCoD, - CastlevaniaLoI, - FinalFightStreetwise, - CrashNburn, - TombRaiderUnderworld, - TombRaiderAnniversary, - TombRaiderLegend, - SSX3, - Black, - VF4, - TyTasmanianTiger, - TyTasmanianTiger2, - FFVIIDoC, - DigimonRumbleArena2, - StarWarsForceUnleashed, - StarWarsBattlefront, - StarWarsBattlefront2, - BlackHawkDown, - DevilMayCry3, - BurnoutTakedown, - BurnoutRevenge, - BurnoutDominator, - MidnightClub3, - XmenOriginsWolverine, - CallofDutyFinalFronts, - SpyroNewBeginning, - SpyroEternalNight, - TalesOfLegendia, - NanoBreaker, - Kunoichi, - Yakuza, - Yakuza2, - SkyGunner, - JamesBondEverythingOrNothing, - Siren, - ZettaiZetsumeiToshi2, - VF4EVO, - Dororo, - ShinOnimusha, - GetaWay, - GetaWayBlackMonday, - SakuraTaisen, - SakuraWarsSoLongMyLove, - FightingBeautyWulong, - TouristTrophy, - GTASanAndreas, - FrontMission5, - GodHand, - KnightsOfTheTemple2, - UltramanFightingEvolution, - DeathByDegreesTekkenNinaWilliams, - AlpineRacer3, - HummerBadlands, - SengokuBasara, - Grandia3, - TalesofSymphonia, - TalesofDestiny, - SDGundamGGeneration, - SoulCalibur2, - SoulCalibur3, - Simple2000Vol114, - SeintoSeiya, - UrbanReign, - SteambotChronicles, - SacredBlaze, - SuperManReturns, - TitleCount, - }; - - enum Region - { - NoRegion, - US, - EU, - JP, - JPUNDUB, - RU, - FR, - DE, - IT, - ES, - CH, - ASIA, - KO, - RegionCount, - }; - - enum Flags - { - PointListPalette = 1, - ZWriteMustNotClear = 2, - }; - - struct Game - { - uint32 crc; - Title title; - Region region; - uint32 flags; - }; - -private: - static Game m_games[]; - static hash_map m_map; - -public: - static Game Lookup(uint32 crc); -}; diff --git a/plugins/GSdx_legacy/GSDevice.cpp b/plugins/GSdx_legacy/GSDevice.cpp deleted file mode 100644 index 23e01d0fc8..0000000000 --- a/plugins/GSdx_legacy/GSDevice.cpp +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSDevice.h" - -GSDevice::GSDevice() - : m_wnd(NULL) - , m_vsync(false) - , m_rbswapped(false) - , m_backbuffer(NULL) - , m_merge(NULL) - , m_weavebob(NULL) - , m_blend(NULL) - , m_shaderfx(NULL) - , m_fxaa(NULL) - , m_shadeboost(NULL) - , m_1x1(NULL) - , m_current(NULL) - , m_frame(0) -{ - memset(&m_vertex, 0, sizeof(m_vertex)); - memset(&m_index, 0, sizeof(m_index)); -} - -GSDevice::~GSDevice() -{ - for_each(m_pool.begin(), m_pool.end(), delete_object()); - - delete m_backbuffer; - delete m_merge; - delete m_weavebob; - delete m_blend; - delete m_shaderfx; - delete m_fxaa; - delete m_shadeboost; - delete m_1x1; -} - -bool GSDevice::Create(GSWnd* wnd) -{ - m_wnd = wnd; - - return true; -} - -bool GSDevice::Reset(int w, int h) -{ - for_each(m_pool.begin(), m_pool.end(), delete_object()); - - m_pool.clear(); - - delete m_backbuffer; - delete m_merge; - delete m_weavebob; - delete m_blend; - delete m_shaderfx; - delete m_fxaa; - delete m_shadeboost; - delete m_1x1; - - m_backbuffer = NULL; - m_merge = NULL; - m_weavebob = NULL; - m_blend = NULL; - m_shaderfx = NULL; - m_fxaa = NULL; - m_shadeboost = NULL; - m_1x1 = NULL; - - m_current = NULL; // current is special, points to other textures, no need to delete - - return m_wnd != NULL; -} - -void GSDevice::Present(const GSVector4i& r, int shader) -{ - GSVector4i cr = m_wnd->GetClientRect(); - - int w = std::max(cr.width(), 1); - int h = std::max(cr.height(), 1); - - if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) - { - if(!Reset(w, h)) - { - return; - } - } - - GL_PUSH("Present"); - - ClearRenderTarget(m_backbuffer, 0); - - if(m_current) - { - static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE, - ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER, - ShaderConvert_COMPLEX_FILTER}; // FIXME - - Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); - } - - Flip(); - - GL_POP(); -} - -void GSDevice::Present(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, int shader) -{ - StretchRect(sTex, dTex, dRect, shader); -} - -GSTexture* GSDevice::FetchSurface(int type, int w, int h, bool msaa, int format) -{ - GSVector2i size(w, h); - - for(list::iterator i = m_pool.begin(); i != m_pool.end(); i++) - { - GSTexture* t = *i; - - if(t->GetType() == type && t->GetFormat() == format && t->GetSize() == size && t->IsMSAA() == msaa) - { - m_pool.erase(i); - - return t; - } - } - - return CreateSurface(type, w, h, msaa, format); -} - -void GSDevice::PrintMemoryUsage() -{ -#ifdef ENABLE_OGL_DEBUG - uint32 pool = 0; - for(list::iterator i = m_pool.begin(); i != m_pool.end(); i++) - { - GSTexture* t = *i; - if (t) - pool += t->GetMemUsage(); - } - GL_PERF("MEM: Surface Pool %dMB", pool >> 20u); -#endif -} - -void GSDevice::EndScene() -{ - m_vertex.start += m_vertex.count; - m_vertex.count = 0; - m_index.start += m_index.count; - m_index.count = 0; -} - -void GSDevice::Recycle(GSTexture* t) -{ - if(t) - { - // FIXME: WARNING: Broken Texture Cache reuse render target without any - // cleaning (or uploading of correct gs mem data) Ofc it is wrong. If - // blending is enabled, rendering would be completely broken. However - // du to wrong invalidation of the TC it is sometimes better to reuse - // (partially) wrong data... - // - // Invalidating the data might be even worse. I'm not sure invalidating data really - // help on the perf. But people reports better perf on BDG2 (memory intensive) on OpenGL. - // It could be the reason. - t->Invalidate(); - - t->last_frame_used = m_frame; - - m_pool.push_front(t); - - //printf("%d\n",m_pool.size()); - - while(m_pool.size() > 300) - { - delete m_pool.back(); - - m_pool.pop_back(); - } - } -} - -void GSDevice::AgePool() -{ - m_frame++; - - while(m_pool.size() > 20 && m_frame - m_pool.back()->last_frame_used > 10) - { - delete m_pool.back(); - - m_pool.pop_back(); - } -} - -GSTexture* GSDevice::CreateRenderTarget(int w, int h, bool msaa, int format) -{ - return FetchSurface(GSTexture::RenderTarget, w, h, msaa, format); -} - -GSTexture* GSDevice::CreateDepthStencil(int w, int h, bool msaa, int format) -{ - return FetchSurface(GSTexture::DepthStencil, w, h, msaa, format); -} - -GSTexture* GSDevice::CreateTexture(int w, int h, int format) -{ - return FetchSurface(GSTexture::Texture, w, h, false, format); -} - -GSTexture* GSDevice::CreateOffscreen(int w, int h, int format) -{ - return FetchSurface(GSTexture::Offscreen, w, h, false, format); -} - -void GSDevice::StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear) -{ - StretchRect(sTex, GSVector4(0, 0, 1, 1), dTex, dRect, shader, linear); -} - -GSTexture* GSDevice::GetCurrent() -{ - return m_current; -} - -void GSDevice::Merge(GSTexture* sTex[2], GSVector4* sRect, GSVector4* dRect, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c) -{ - if(m_merge == NULL || m_merge->GetSize() != fs) - { - Recycle(m_merge); - - m_merge = CreateRenderTarget(fs.x, fs.y, false); - } - - // TODO: m_1x1 - - // KH:COM crashes at startup when booting *through the bios* due to m_merge being NULL. - // (texture appears to be non-null, and is being re-created at a size around like 1700x340, - // dunno if that's relevant) -- air - - if(m_merge) - { - GSTexture* tex[2] = {NULL, NULL}; - - for(size_t i = 0; i < countof(tex); i++) - { - if(sTex[i] != NULL) - { - tex[i] = sTex[i]->IsMSAA() ? Resolve(sTex[i]) : sTex[i]; - } - } - - DoMerge(tex, sRect, m_merge, dRect, slbg, mmod, c); - - for(size_t i = 0; i < countof(tex); i++) - { - if(tex[i] != sTex[i]) - { - Recycle(tex[i]); - } - } - } - else - { - printf("GSdx: m_merge is NULL!\n"); - } - - m_current = m_merge; -} - -void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffset) -{ - if(m_weavebob == NULL || m_weavebob->GetSize() != ds) - { - delete m_weavebob; - - m_weavebob = CreateRenderTarget(ds.x, ds.y, false); - } - - if(mode == 0 || mode == 2) // weave or blend - { - // weave first - - DoInterlace(m_merge, m_weavebob, field, false, 0); - - if(mode == 2) - { - // blend - - if(m_blend == NULL || m_blend->GetSize() != ds) - { - delete m_blend; - - m_blend = CreateRenderTarget(ds.x, ds.y, false); - } - - DoInterlace(m_weavebob, m_blend, 2, false, 0); - - m_current = m_blend; - } - else - { - m_current = m_weavebob; - } - } - else if(mode == 1) // bob - { - DoInterlace(m_merge, m_weavebob, 3, true, yoffset * field); - - m_current = m_weavebob; - } - else - { - m_current = m_merge; - } -} - -void GSDevice::ExternalFX() -{ - GSVector2i s = m_current->GetSize(); - - if (m_shaderfx == NULL || m_shaderfx->GetSize() != s) - { - delete m_shaderfx; - m_shaderfx = CreateRenderTarget(s.x, s.y, false); - } - - if (m_shaderfx != NULL) - { - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - StretchRect(m_current, sRect, m_shaderfx, dRect, 7, false); - DoExternalFX(m_shaderfx, m_current); - } -} - -void GSDevice::FXAA() -{ - GSVector2i s = m_current->GetSize(); - - if(m_fxaa == NULL || m_fxaa->GetSize() != s) - { - delete m_fxaa; - m_fxaa = CreateRenderTarget(s.x, s.y, false); - } - - if(m_fxaa != NULL) - { - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - StretchRect(m_current, sRect, m_fxaa, dRect, 7, false); - DoFXAA(m_fxaa, m_current); - } -} - -void GSDevice::ShadeBoost() -{ - GSVector2i s = m_current->GetSize(); - - if(m_shadeboost == NULL || m_shadeboost->GetSize() != s) - { - delete m_shadeboost; - m_shadeboost = CreateRenderTarget(s.x, s.y, false); - } - - if(m_shadeboost != NULL) - { - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - StretchRect(m_current, sRect, m_shadeboost, dRect, 0, false); - DoShadeBoost(m_shadeboost, m_current); - } -} - -bool GSDevice::ResizeTexture(GSTexture** t, int w, int h) -{ - if(t == NULL) {ASSERT(0); return false;} - - GSTexture* t2 = *t; - - if(t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h) - { - delete t2; - - t2 = CreateTexture(w, h); - - *t = t2; - } - - return t2 != NULL; -} - -GSAdapter::operator std::string() const -{ - char buf[sizeof "12345678:12345678:12345678:12345678"]; - sprintf(buf, "%.4X:%.4X:%.8X:%.2X", vendor, device, subsys, rev); - return buf; -} - -bool GSAdapter::operator==(const GSAdapter &desc_dxgi) const -{ - return vendor == desc_dxgi.vendor - && device == desc_dxgi.device - && subsys == desc_dxgi.subsys - && rev == desc_dxgi.rev; -} - -#ifdef _WIN32 -GSAdapter::GSAdapter(const DXGI_ADAPTER_DESC1 &desc_dxgi) - : vendor(desc_dxgi.VendorId) - , device(desc_dxgi.DeviceId) - , subsys(desc_dxgi.SubSysId) - , rev(desc_dxgi.Revision) -{ -} - -GSAdapter::GSAdapter(const D3DADAPTER_IDENTIFIER9 &desc_d3d9) - : vendor(desc_d3d9.VendorId) - , device(desc_d3d9.DeviceId) - , subsys(desc_d3d9.SubSysId) - , rev(desc_d3d9.Revision) -{ -} -#endif -#ifdef __linux__ -// TODO -#endif diff --git a/plugins/GSdx_legacy/GSDevice.h b/plugins/GSdx_legacy/GSDevice.h deleted file mode 100644 index 186d88b751..0000000000 --- a/plugins/GSdx_legacy/GSDevice.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSWnd.h" -#include "GSTexture.h" -#include "GSVertex.h" -#include "GSAlignedClass.h" - -enum ShaderConvert { - ShaderConvert_COPY = 0, - ShaderConvert_RGBA8_TO_16_BITS, - ShaderConvert_DATM_1, - ShaderConvert_DATM_0, - ShaderConvert_MOD_256, - ShaderConvert_SCANLINE = 5, - ShaderConvert_DIAGONAL_FILTER, - ShaderConvert_TRANSPARENCY_FILTER, - ShaderConvert_TRIANGULAR_FILTER, - ShaderConvert_COMPLEX_FILTER, - ShaderConvert_FLOAT32_TO_32_BITS = 10, - ShaderConvert_FLOAT32_TO_RGBA8, - ShaderConvert_FLOAT16_TO_RGB5A1, - ShaderConvert_RGBA8_TO_FLOAT32 = 13, - ShaderConvert_RGBA8_TO_FLOAT24, - ShaderConvert_RGBA8_TO_FLOAT16, - ShaderConvert_RGB5A1_TO_FLOAT16, - ShaderConvert_RGBA_TO_8I = 17 -}; - -#pragma pack(push, 1) - -class ConvertConstantBuffer -{ -public: - GSVector4i ScalingFactor; - - ConvertConstantBuffer() {memset(this, 0, sizeof(*this));} -}; - -class MergeConstantBuffer -{ -public: - GSVector4 BGColor; - - MergeConstantBuffer() {memset(this, 0, sizeof(*this));} -}; - -class InterlaceConstantBuffer -{ -public: - GSVector2 ZrH; - float hH; - float _pad[1]; - - InterlaceConstantBuffer() {memset(this, 0, sizeof(*this));} -}; - -class ExternalFXConstantBuffer -{ -public: - GSVector2 xyFrame; - GSVector4 rcpFrame; - GSVector4 rcpFrameOpt; - - ExternalFXConstantBuffer() { memset(this, 0, sizeof(*this)); } -}; - -class FXAAConstantBuffer -{ -public: - GSVector4 rcpFrame; - GSVector4 rcpFrameOpt; - - FXAAConstantBuffer() {memset(this, 0, sizeof(*this));} -}; - -class ShadeBoostConstantBuffer -{ -public: - GSVector4 rcpFrame; - GSVector4 rcpFrameOpt; - - ShadeBoostConstantBuffer() {memset(this, 0, sizeof(*this));} -}; - -#pragma pack(pop) - -class GSDevice : public GSAlignedClass<32> -{ - list m_pool; - -protected: - GSWnd* m_wnd; - bool m_vsync; - bool m_rbswapped; - GSTexture* m_backbuffer; - GSTexture* m_merge; - GSTexture* m_weavebob; - GSTexture* m_blend; - GSTexture* m_shaderfx; - GSTexture* m_fxaa; - GSTexture* m_shadeboost; - GSTexture* m_1x1; - GSTexture* m_current; - struct {size_t stride, start, count, limit;} m_vertex; - struct {size_t start, count, limit;} m_index; - unsigned int m_frame; // for ageing the pool - - virtual GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format) = 0; - virtual GSTexture* FetchSurface(int type, int w, int h, bool msaa, int format); - - virtual void DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c) = 0; - virtual void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) = 0; - virtual void DoFXAA(GSTexture* sTex, GSTexture* dTex) {} - virtual void DoShadeBoost(GSTexture* sTex, GSTexture* dTex) {} - virtual void DoExternalFX(GSTexture* sTex, GSTexture* dTex) {} - -public: - GSDevice(); - virtual ~GSDevice(); - - void Recycle(GSTexture* t); - - enum {Windowed, Fullscreen, DontCare}; - - virtual bool Create(GSWnd* wnd); - virtual bool Reset(int w, int h); - virtual bool IsLost(bool update = false) {return false;} - virtual void Present(const GSVector4i& r, int shader); - virtual void Present(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, int shader = 0); - virtual void Flip() {} - - virtual void SetVSync(bool enable) {m_vsync = enable;} - - virtual void BeginScene() {} - virtual void DrawPrimitive() {}; - virtual void DrawIndexedPrimitive() {} - virtual void DrawIndexedPrimitive(int offset, int count) {} - virtual void EndScene(); - - virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {} - virtual void ClearRenderTarget(GSTexture* t, uint32 c) {} - virtual void ClearDepth(GSTexture* t, float c) {} - virtual void ClearStencil(GSTexture* t, uint8 c) {} - - virtual GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); - virtual GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); - virtual GSTexture* CreateTexture(int w, int h, int format = 0); - virtual GSTexture* CreateOffscreen(int w, int h, int format = 0); - - virtual GSTexture* Resolve(GSTexture* t) {return NULL;} - - virtual GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0) {return NULL;} - - virtual void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) {} - virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true) {} - - void StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true); - - virtual void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) {} - virtual void PSSetShaderResource(int i, GSTexture* sRect) {} - virtual void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) {} - - GSTexture* GetCurrent(); - - void Merge(GSTexture* sTex[2], GSVector4* sRect, GSVector4* dRect, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c); - void Interlace(const GSVector2i& ds, int field, int mode, float yoffset); - void FXAA(); - void ShadeBoost(); - void ExternalFX(); - - bool ResizeTexture(GSTexture** t, int w, int h); - - bool IsRBSwapped() {return m_rbswapped;} - - void AgePool(); - - virtual void PrintMemoryUsage(); -}; - -struct GSAdapter -{ - uint32 vendor; - uint32 device; - uint32 subsys; - uint32 rev; - - operator std::string() const; - bool operator==(const GSAdapter&) const; - bool operator==(const std::string &s) const - { - return (std::string)*this == s; - } - bool operator==(const char *s) const - { - return (std::string)*this == s; - } - -#ifdef _WIN32 - GSAdapter(const DXGI_ADAPTER_DESC1 &desc_dxgi); - GSAdapter(const D3DADAPTER_IDENTIFIER9 &desc_d3d9); -#endif -#ifdef __linux__ - // TODO -#endif -}; diff --git a/plugins/GSdx_legacy/GSDevice11.cpp b/plugins/GSdx_legacy/GSDevice11.cpp deleted file mode 100644 index 1c4a2d8cec..0000000000 --- a/plugins/GSdx_legacy/GSDevice11.cpp +++ /dev/null @@ -1,1482 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSDevice11.h" -#include "GSUtil.h" -#include "resource.h" -#include - -GSDevice11::GSDevice11() -{ - memset(&m_state, 0, sizeof(m_state)); - memset(&m_vs_cb_cache, 0, sizeof(m_vs_cb_cache)); - memset(&m_ps_cb_cache, 0, sizeof(m_ps_cb_cache)); - - FXAA_Compiled = false; - ExShader_Compiled = false; - - m_state.topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; - m_state.bf = -1; -} - -GSDevice11::~GSDevice11() -{ -} - -bool GSDevice11::Create(GSWnd* wnd) -{ - if(!__super::Create(wnd)) - { - return false; - } - - HRESULT hr = E_FAIL; - - DXGI_SWAP_CHAIN_DESC scd; - D3D11_BUFFER_DESC bd; - D3D11_SAMPLER_DESC sd; - D3D11_DEPTH_STENCIL_DESC dsd; - D3D11_RASTERIZER_DESC rd; - D3D11_BLEND_DESC bsd; - - CComPtr adapter; - D3D_DRIVER_TYPE driver_type = D3D_DRIVER_TYPE_HARDWARE; - - std::string adapter_id = theApp.GetConfig("Adapter", "default"); - - if (adapter_id == "default") - ; - else if (adapter_id == "ref") - { - driver_type = D3D_DRIVER_TYPE_REFERENCE; - } - else - { - CComPtr dxgi_factory; - CreateDXGIFactory1(__uuidof(IDXGIFactory1), (void**)&dxgi_factory); - if (dxgi_factory) - for (int i = 0;; i++) - { - CComPtr enum_adapter; - if (S_OK != dxgi_factory->EnumAdapters1(i, &enum_adapter)) - break; - DXGI_ADAPTER_DESC1 desc; - hr = enum_adapter->GetDesc1(&desc); - if (S_OK == hr && GSAdapter(desc) == adapter_id) - { - adapter = enum_adapter; - driver_type = D3D_DRIVER_TYPE_UNKNOWN; - break; - } - } - } - - memset(&scd, 0, sizeof(scd)); - - scd.BufferCount = 2; - scd.BufferDesc.Width = 1; - scd.BufferDesc.Height = 1; - scd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - //scd.BufferDesc.RefreshRate.Numerator = 60; - //scd.BufferDesc.RefreshRate.Denominator = 1; - scd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - scd.OutputWindow = (HWND)m_wnd->GetHandle(); - scd.SampleDesc.Count = 1; - scd.SampleDesc.Quality = 0; - - // Always start in Windowed mode. According to MS, DXGI just "prefers" this, and it's more or less - // required if we want to add support for dual displays later on. The fullscreen/exclusive flip - // will be issued after all other initializations are complete. - - scd.Windowed = TRUE; - - spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0; - // NOTE : D3D11_CREATE_DEVICE_SINGLETHREADED - // This flag is safe as long as the DXGI's internal message pump is disabled or is on the - // same thread as the GS window (which the emulator makes sure of, if it utilizes a - // multithreaded GS). Setting the flag is a nice and easy 5% speedup on GS-intensive scenes. - - uint32 flags = D3D11_CREATE_DEVICE_SINGLETHREADED; - -#ifdef DEBUG - flags |= D3D11_CREATE_DEVICE_DEBUG; -#endif - - D3D_FEATURE_LEVEL level; - - const D3D_FEATURE_LEVEL levels[] = - { - D3D_FEATURE_LEVEL_11_0, - D3D_FEATURE_LEVEL_10_1, - D3D_FEATURE_LEVEL_10_0, - }; - - hr = D3D11CreateDeviceAndSwapChain(adapter, driver_type, NULL, flags, levels, countof(levels), D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &level, &m_ctx); - - if(FAILED(hr)) return false; - - if(!SetFeatureLevel(level, true)) - { - return false; - } - - D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS options; - - hr = m_dev->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &options, sizeof(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS)); - - // msaa - - for(uint32 i = 2; i <= D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT; i++) - { - uint32 quality[2] = {0, 0}; - - if(SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, i, &quality[0])) && quality[0] > 0 - && SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_D32_FLOAT_S8X24_UINT, i, &quality[1])) && quality[1] > 0) - { - m_msaa_desc.Count = i; - m_msaa_desc.Quality = std::min(quality[0] - 1, quality[1] - 1); - - if(i >= m_msaa) break; - } - } - - if(m_msaa_desc.Count == 1) - { - m_msaa = 0; - } - - // convert - - D3D11_INPUT_ELEMENT_DESC il_convert[] = - { - {"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0}, - }; - - vector shader; - theApp.LoadResource(IDR_CONVERT_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "convert.fx", nullptr, "vs_main", nullptr, &m_convert.vs, il_convert, countof(il_convert), &m_convert.il); - - for(size_t i = 0; i < countof(m_convert.ps); i++) - { - CompileShader((const char *)shader.data(), shader.size(), "convert.fx", nullptr, format("ps_main%d", i).c_str(), nullptr, &m_convert.ps[i]); - } - - memset(&dsd, 0, sizeof(dsd)); - - dsd.DepthEnable = false; - dsd.StencilEnable = false; - - hr = m_dev->CreateDepthStencilState(&dsd, &m_convert.dss); - - memset(&bsd, 0, sizeof(bsd)); - - bsd.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - - hr = m_dev->CreateBlendState(&bsd, &m_convert.bs); - - // merge - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(MergeConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_merge.cb); - - theApp.LoadResource(IDR_MERGE_FX, shader); - for(size_t i = 0; i < countof(m_merge.ps); i++) - { - CompileShader((const char *)shader.data(), shader.size(), "merge.fx", nullptr, format("ps_main%d", i).c_str(), nullptr, &m_merge.ps[i]); - } - - memset(&bsd, 0, sizeof(bsd)); - - bsd.RenderTarget[0].BlendEnable = true; - bsd.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; - bsd.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; - bsd.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; - bsd.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - bsd.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - bsd.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; - bsd.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - - hr = m_dev->CreateBlendState(&bsd, &m_merge.bs); - - // interlace - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(InterlaceConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_interlace.cb); - - theApp.LoadResource(IDR_INTERLACE_FX, shader); - for(size_t i = 0; i < countof(m_interlace.ps); i++) - { - CompileShader((const char *)shader.data(), shader.size(), "interlace.fx", nullptr, format("ps_main%d", i).c_str(), nullptr, &m_interlace.ps[i]); - } - - // Shade Boost - - int ShadeBoost_Contrast = theApp.GetConfig("ShadeBoost_Contrast", 50); - int ShadeBoost_Brightness = theApp.GetConfig("ShadeBoost_Brightness", 50); - int ShadeBoost_Saturation = theApp.GetConfig("ShadeBoost_Saturation", 50); - - string str[3]; - - str[0] = format("%d", ShadeBoost_Saturation); - str[1] = format("%d", ShadeBoost_Brightness); - str[2] = format("%d", ShadeBoost_Contrast); - - D3D_SHADER_MACRO macro[] = - { - {"SB_SATURATION", str[0].c_str()}, - {"SB_BRIGHTNESS", str[1].c_str()}, - {"SB_CONTRAST", str[2].c_str()}, - {NULL, NULL}, - }; - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(ShadeBoostConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_shadeboost.cb); - - theApp.LoadResource(IDR_SHADEBOOST_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "shadeboost.fx", nullptr, "ps_main", macro, &m_shadeboost.ps); - - // External fx shader - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(ExternalFXConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_shaderfx.cb); - - // Fxaa - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(FXAAConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_fxaa.cb); - - // - - memset(&rd, 0, sizeof(rd)); - - rd.FillMode = D3D11_FILL_SOLID; - rd.CullMode = D3D11_CULL_NONE; - rd.FrontCounterClockwise = false; - rd.DepthBias = false; - rd.DepthBiasClamp = 0; - rd.SlopeScaledDepthBias = 0; - rd.DepthClipEnable = false; // ??? - rd.ScissorEnable = true; - rd.MultisampleEnable = true; - rd.AntialiasedLineEnable = false; - - hr = m_dev->CreateRasterizerState(&rd, &m_rs); - - m_ctx->RSSetState(m_rs); - - // - - memset(&sd, 0, sizeof(sd)); - - sd.Filter = theApp.GetConfig("MaxAnisotropy", 0) && !theApp.GetConfig("paltex", 0) ? D3D11_FILTER_ANISOTROPIC : D3D11_FILTER_MIN_MAG_MIP_LINEAR; - sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.MinLOD = -FLT_MAX; - sd.MaxLOD = FLT_MAX; - sd.MaxAnisotropy = theApp.GetConfig("MaxAnisotropy", 0); - sd.ComparisonFunc = D3D11_COMPARISON_NEVER; - - hr = m_dev->CreateSamplerState(&sd, &m_convert.ln); - - sd.Filter = theApp.GetConfig("MaxAnisotropy", 0) && !theApp.GetConfig("paltex", 0) ? D3D11_FILTER_ANISOTROPIC : D3D11_FILTER_MIN_MAG_MIP_POINT; - - hr = m_dev->CreateSamplerState(&sd, &m_convert.pt); - - // - - Reset(1, 1); - - // - - CreateTextureFX(); - - // - - memset(&dsd, 0, sizeof(dsd)); - - dsd.DepthEnable = false; - dsd.StencilEnable = true; - dsd.StencilReadMask = 1; - dsd.StencilWriteMask = 1; - dsd.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; - dsd.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; - dsd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; - dsd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; - dsd.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS; - dsd.BackFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; - dsd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; - dsd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; - - m_dev->CreateDepthStencilState(&dsd, &m_date.dss); - - D3D11_BLEND_DESC blend; - - memset(&blend, 0, sizeof(blend)); - - m_dev->CreateBlendState(&blend, &m_date.bs); - - // Exclusive/Fullscreen flip, issued for legacy (managed) windows only. GSopen2 style - // emulators will issue the flip themselves later on. - - if(m_wnd->IsManaged()) - { - SetExclusive(!theApp.GetConfig("windowed", 1)); - } - - return true; -} - -bool GSDevice11::Reset(int w, int h) -{ - if(!__super::Reset(w, h)) - return false; - - if(m_swapchain) - { - DXGI_SWAP_CHAIN_DESC scd; - - memset(&scd, 0, sizeof(scd)); - - m_swapchain->GetDesc(&scd); - m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0); - - CComPtr backbuffer; - - if(FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer))) - { - return false; - } - - m_backbuffer = new GSTexture11(backbuffer); - } - - return true; -} - -void GSDevice11::SetExclusive(bool isExcl) -{ - if(!m_swapchain) return; - - // TODO : Support for alternative display modes, by finishing this code below: - // Video mode info should be pulled form config/ini. - - /*DXGI_MODE_DESC desc; - memset(&desc, 0, sizeof(desc)); - desc.RefreshRate = 0; // must be zero for best results. - - m_swapchain->ResizeTarget(&desc); - */ - - HRESULT hr = m_swapchain->SetFullscreenState(isExcl, NULL); - - if(hr == DXGI_ERROR_NOT_CURRENTLY_AVAILABLE) - { - fprintf(stderr, "(GSdx10) SetExclusive(%s) failed; request unavailable.", isExcl ? "true" : "false"); - } -} - -void GSDevice11::Flip() -{ - m_swapchain->Present(m_vsync, 0); -} - -void GSDevice11::DrawPrimitive() -{ - m_ctx->Draw(m_vertex.count, m_vertex.start); -} - -void GSDevice11::DrawIndexedPrimitive() -{ - m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start); -} - -void GSDevice11::DrawIndexedPrimitive(int offset, int count) -{ - ASSERT(offset + count <= m_index.count); - - m_ctx->DrawIndexed(count, m_index.start + offset, m_vertex.start); -} - -void GSDevice11::Dispatch(uint32 x, uint32 y, uint32 z) -{ - m_ctx->Dispatch(x, y, z); -} - -void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c) -{ - if (!t) return; - m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v); -} - -void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c) -{ - if (!t) return; - GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255); - - m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v); -} - -void GSDevice11::ClearDepth(GSTexture* t, float c) -{ - if (!t) return; - m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_DEPTH, c, 0); -} - -void GSDevice11::ClearStencil(GSTexture* t, uint8 c) -{ - if (!t) return; - m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_STENCIL, 0, c); -} - -GSTexture* GSDevice11::CreateSurface(int type, int w, int h, bool msaa, int format) -{ - HRESULT hr; - - D3D11_TEXTURE2D_DESC desc; - - memset(&desc, 0, sizeof(desc)); - - desc.Width = w; - desc.Height = h; - desc.Format = (DXGI_FORMAT)format; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.Usage = D3D11_USAGE_DEFAULT; - - if(msaa) - { - desc.SampleDesc = m_msaa_desc; - } - - switch(type) - { - case GSTexture::RenderTarget: - desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; - break; - case GSTexture::DepthStencil: - desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; - break; - case GSTexture::Texture: - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - break; - case GSTexture::Offscreen: - desc.Usage = D3D11_USAGE_STAGING; - desc.CPUAccessFlags |= D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - break; - } - - GSTexture11* t = NULL; - - CComPtr texture; - - hr = m_dev->CreateTexture2D(&desc, NULL, &texture); - - if(SUCCEEDED(hr)) - { - t = new GSTexture11(texture); - - switch(type) - { - case GSTexture::RenderTarget: - ClearRenderTarget(t, 0); - break; - case GSTexture::DepthStencil: - ClearDepth(t, 0); - break; - } - } - - return t; -} - -GSTexture* GSDevice11::CreateRenderTarget(int w, int h, bool msaa, int format) -{ - return __super::CreateRenderTarget(w, h, msaa, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); -} - -GSTexture* GSDevice11::CreateDepthStencil(int w, int h, bool msaa, int format) -{ - return __super::CreateDepthStencil(w, h, msaa, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); // DXGI_FORMAT_R32G8X24_TYPELESS -} - -GSTexture* GSDevice11::CreateTexture(int w, int h, int format) -{ - return __super::CreateTexture(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); -} - -GSTexture* GSDevice11::CreateOffscreen(int w, int h, int format) -{ - return __super::CreateOffscreen(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); -} - -GSTexture* GSDevice11::Resolve(GSTexture* t) -{ - ASSERT(t != NULL && t->IsMSAA()); - - if(GSTexture* dst = CreateRenderTarget(t->GetWidth(), t->GetHeight(), false, t->GetFormat())) - { - dst->SetScale(t->GetScale()); - - m_ctx->ResolveSubresource(*(GSTexture11*)dst, 0, *(GSTexture11*)t, 0, (DXGI_FORMAT)t->GetFormat()); - - return dst; - } - - return NULL; -} - -GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format, int ps_shader) -{ - GSTexture* dst = NULL; - - if(format == 0) - { - format = DXGI_FORMAT_R8G8B8A8_UNORM; - } - - if(format != DXGI_FORMAT_R8G8B8A8_UNORM && format != DXGI_FORMAT_R16_UINT) - { - ASSERT(0); - - return false; - } - - if(GSTexture* rt = CreateRenderTarget(w, h, false, format)) - { - GSVector4 dRect(0, 0, w, h); - - if(GSTexture* src2 = src->IsMSAA() ? Resolve(src) : src) - { - StretchRect(src2, sRect, rt, dRect, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); - - if(src2 != src) Recycle(src2); - } - - dst = CreateOffscreen(w, h, format); - - if(dst) - { - m_ctx->CopyResource(*(GSTexture11*)dst, *(GSTexture11*)rt); - } - - Recycle(rt); - } - - return dst; -} - -void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) -{ - if(!sTex || !dTex) - { - ASSERT(0); - return; - } - - D3D11_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1}; - - m_ctx->CopySubresourceRegion(*(GSTexture11*)dTex, 0, 0, 0, 0, *(GSTexture11*)sTex, 0, &box); -} - -void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear) -{ - StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[shader], NULL, linear); -} - -void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear) -{ - StretchRect(sTex, sRect, dTex, dRect, ps, ps_cb, m_convert.bs, linear); -} - -void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear) -{ - if(!sTex || !dTex) - { - ASSERT(0); - return; - } - - BeginScene(); - - GSVector2i ds = dTex->GetSize(); - - // om - - OMSetDepthStencilState(m_convert.dss, 0); - OMSetBlendState(bs, 0); - OMSetRenderTargets(dTex, NULL); - - // ia - - float left = dRect.x * 2 / ds.x - 1.0f; - float top = 1.0f - dRect.y * 2 / ds.y; - float right = dRect.z * 2 / ds.x - 1.0f; - float bottom = 1.0f - dRect.w * 2 / ds.y; - - GSVertexPT1 vertices[] = - { - {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)}, - {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)}, - {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)}, - {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)}, - }; - - - - IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); - IASetInputLayout(m_convert.il); - IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - - // vs - - VSSetShader(m_convert.vs, NULL); - - - // gs - /* NVIDIA HACK!!!! - Not sure why, but having the Geometry shader disabled causes the strange stretching in recent drivers*/ - - GSSelector sel; - //Don't use shading for stretching, we're just passing through - Note: With Win10 it seems to cause other bugs when shading is off if any of the coords is greater than 0 - //I really don't know whats going on there, but this seems to resolve it mostly (if not all, not tester a lot of games, only BIOS, FFXII and VP2) - //sel.iip = (sRect.y > 0.0f || sRect.w > 0.0f) ? 1 : 0; - //sel.prim = 2; //Triangle Strip - //SetupGS(sel); - - GSSetShader(NULL); - - /*END OF HACK*/ - - // - - // ps - - PSSetShaderResources(sTex, NULL); - PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL); - PSSetShader(ps, ps_cb); - - // - - DrawPrimitive(); - - // - - EndScene(); - - PSSetShaderResources(NULL, NULL); -} - -void GSDevice11::DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c) -{ - ClearRenderTarget(dTex, c); - - if(sTex[1] && !slbg) - { - StretchRect(sTex[1], sRect[1], dTex, dRect[1], m_merge.ps[0], NULL, true); - } - - if(sTex[0]) - { - m_ctx->UpdateSubresource(m_merge.cb, 0, NULL, &c, 0, 0); - - StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge.ps[mmod ? 1 : 0], m_merge.cb, m_merge.bs, true); - } -} - -void GSDevice11::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) -{ - GSVector4 s = GSVector4(dTex->GetSize()); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0.0f, yoffset, s.x, s.y + yoffset); - - InterlaceConstantBuffer cb; - - cb.ZrH = GSVector2(0, 1.0f / s.y); - cb.hH = s.y / 2; - - m_ctx->UpdateSubresource(m_interlace.cb, 0, NULL, &cb, 0, 0); - - StretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[shader], m_interlace.cb, linear); -} - -//Included an init function for this also. Just to be safe. -void GSDevice11::InitExternalFX() -{ - if (!ExShader_Compiled) - { - try { - std::string config_name(theApp.GetConfig("shaderfx_conf", "shaders/GSdx_FX_Settings.ini")); - std::ifstream fconfig(config_name); - std::stringstream shader; - if (fconfig.good()) - shader << fconfig.rdbuf() << "\n"; - else - fprintf(stderr, "GSdx: External shader config '%s' not loaded.\n", config_name.c_str()); - - std::string shader_name(theApp.GetConfig("shaderfx_glsl", "shaders/GSdx.fx")); - std::ifstream fshader(shader_name); - if (fshader.good()) - { - shader << fshader.rdbuf(); - CompileShader(shader.str().c_str(), shader.str().length(), shader_name.c_str(), nullptr, "ps_main", nullptr, &m_shaderfx.ps); - } - else - { - fprintf(stderr, "GSdx: External shader '%s' not loaded and will be disabled!\n", shader_name.c_str()); - } - } - catch (GSDXRecoverableError) { - printf("GSdx: failed to compile external post-processing shader. \n"); - } - ExShader_Compiled = true; - } -} - -void GSDevice11::DoExternalFX(GSTexture* sTex, GSTexture* dTex) -{ - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - ExternalFXConstantBuffer cb; - - InitExternalFX(); - - cb.xyFrame = GSVector2(s.x, s.y); - cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); - cb.rcpFrameOpt = GSVector4::zero(); - - m_ctx->UpdateSubresource(m_shaderfx.cb, 0, NULL, &cb, 0, 0); - - StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps, m_shaderfx.cb, true); -} - -// This shouldn't be necessary, we have some bug corrupting memory -// and for some reason isolating this code makes the plugin not crash -void GSDevice11::InitFXAA() -{ - if (!FXAA_Compiled) - { - try { - vector shader; - theApp.LoadResource(IDR_FXAA_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "fxaa.fx", nullptr, "ps_main", nullptr, &m_fxaa.ps); - } - catch (GSDXRecoverableError) { - printf("GSdx: failed to compile fxaa shader.\n"); - } - FXAA_Compiled = true; - } -} - -void GSDevice11::DoFXAA(GSTexture* sTex, GSTexture* dTex) -{ - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - FXAAConstantBuffer cb; - - InitFXAA(); - - cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); - cb.rcpFrameOpt = GSVector4::zero(); - - m_ctx->UpdateSubresource(m_fxaa.cb, 0, NULL, &cb, 0, 0); - - StretchRect(sTex, sRect, dTex, dRect, m_fxaa.ps, m_fxaa.cb, true); - - //sTex->Save("c:\\temp1\\1.bmp"); - //dTex->Save("c:\\temp1\\2.bmp"); -} - -void GSDevice11::DoShadeBoost(GSTexture* sTex, GSTexture* dTex) -{ - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - ShadeBoostConstantBuffer cb; - - cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); - cb.rcpFrameOpt = GSVector4::zero(); - - m_ctx->UpdateSubresource(m_shadeboost.cb, 0, NULL, &cb, 0, 0); - - StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps, m_shadeboost.cb, true); -} - -void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm) -{ - // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows - - BeginScene(); - - ClearStencil(ds, 0); - - // om - - OMSetDepthStencilState(m_date.dss, 1); - OMSetBlendState(m_date.bs, 0); - OMSetRenderTargets(NULL, ds); - - // ia - - IASetVertexBuffer(vertices, sizeof(vertices[0]), 4); - IASetInputLayout(m_convert.il); - IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - - // vs - - VSSetShader(m_convert.vs, NULL); - - // gs - - GSSetShader(NULL); - - // ps - - GSTexture* rt2 = rt->IsMSAA() ? Resolve(rt) : rt; - - PSSetShaderResources(rt2, NULL); - PSSetSamplerState(m_convert.pt, NULL); - PSSetShader(m_convert.ps[datm ? 2 : 3], NULL); - - // - - DrawPrimitive(); - - // - - EndScene(); - - if(rt2 != rt) Recycle(rt2); -} - -void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count) -{ - void* ptr = NULL; - - if(IAMapVertexBuffer(&ptr, stride, count)) - { - GSVector4i::storent(ptr, vertex, count * stride); - - IAUnmapVertexBuffer(); - } -} - -bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) -{ - ASSERT(m_vertex.count == 0); - - if(count * stride > m_vertex.limit * m_vertex.stride) - { - m_vb_old = m_vb; - m_vb = NULL; - - m_vertex.start = 0; - m_vertex.limit = std::max(count * 3 / 2, 11000); - } - - if(m_vb == NULL) - { - D3D11_BUFFER_DESC bd; - - memset(&bd, 0, sizeof(bd)); - - bd.Usage = D3D11_USAGE_DYNAMIC; - bd.ByteWidth = m_vertex.limit * stride; - bd.BindFlags = D3D11_BIND_VERTEX_BUFFER; - bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - - HRESULT hr; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_vb); - - if(FAILED(hr)) return false; - } - - D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; - - if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride) - { - m_vertex.start = 0; - - type = D3D11_MAP_WRITE_DISCARD; - } - - D3D11_MAPPED_SUBRESOURCE m; - - if(FAILED(m_ctx->Map(m_vb, 0, type, 0, &m))) - { - return false; - } - - *vertex = (uint8*)m.pData + m_vertex.start * stride; - - m_vertex.count = count; - m_vertex.stride = stride; - - return true; -} - -void GSDevice11::IAUnmapVertexBuffer() -{ - m_ctx->Unmap(m_vb, 0); - - IASetVertexBuffer(m_vb, m_vertex.stride); -} - -void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride) -{ - if(m_state.vb != vb || m_state.vb_stride != stride) - { - m_state.vb = vb; - m_state.vb_stride = stride; - - uint32 stride2 = stride; - uint32 offset = 0; - - m_ctx->IASetVertexBuffers(0, 1, &vb, &stride2, &offset); - } -} - -void GSDevice11::IASetIndexBuffer(const void* index, size_t count) -{ - ASSERT(m_index.count == 0); - - if(count > m_index.limit) - { - m_ib_old = m_ib; - m_ib = NULL; - - m_index.start = 0; - m_index.limit = std::max(count * 3 / 2, 11000); - } - - if(m_ib == NULL) - { - D3D11_BUFFER_DESC bd; - - memset(&bd, 0, sizeof(bd)); - - bd.Usage = D3D11_USAGE_DYNAMIC; - bd.ByteWidth = m_index.limit * sizeof(uint32); - bd.BindFlags = D3D11_BIND_INDEX_BUFFER; - bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - - HRESULT hr; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_ib); - - if(FAILED(hr)) return; - } - - D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; - - if(m_index.start + count > m_index.limit) - { - m_index.start = 0; - - type = D3D11_MAP_WRITE_DISCARD; - } - - D3D11_MAPPED_SUBRESOURCE m; - - if(SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m))) - { - memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32)); - - m_ctx->Unmap(m_ib, 0); - } - - m_index.count = count; - - IASetIndexBuffer(m_ib); -} - -void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib) -{ - if(m_state.ib != ib) - { - m_state.ib = ib; - - m_ctx->IASetIndexBuffer(ib, DXGI_FORMAT_R32_UINT, 0); - } -} - -void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) -{ - if(m_state.layout != layout) - { - m_state.layout = layout; - - m_ctx->IASetInputLayout(layout); - } -} - -void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology) -{ - if(m_state.topology != topology) - { - m_state.topology = topology; - - m_ctx->IASetPrimitiveTopology(topology); - } -} - -void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) -{ - if(m_state.vs != vs) - { - m_state.vs = vs; - - m_ctx->VSSetShader(vs, NULL, 0); - } - - if(m_state.vs_cb != vs_cb) - { - m_state.vs_cb = vs_cb; - - m_ctx->VSSetConstantBuffers(0, 1, &vs_cb); - } -} - -void GSDevice11::GSSetShader(ID3D11GeometryShader* gs) -{ - if(m_state.gs != gs) - { - m_state.gs = gs; - - m_ctx->GSSetShader(gs, NULL, 0); - } -} - -void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) -{ - PSSetShaderResource(0, sr0); - PSSetShaderResource(1, sr1); - - for(size_t i = 2; i < countof(m_state.ps_srv); i++) - { - PSSetShaderResource(i, NULL); - } -} - -void GSDevice11::PSSetShaderResource(int i, GSTexture* sr) -{ - ID3D11ShaderResourceView* srv = NULL; - - if(sr) srv = *(GSTexture11*)sr; - - PSSetShaderResourceView(i, srv); -} - -void GSDevice11::PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv) -{ - ASSERT(i < countof(m_state.ps_srv)); - - if(m_state.ps_srv[i] != srv) - { - m_state.ps_srv[i] = srv; - - m_srv_changed = true; - } -} - -void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2) -{ - if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1 || m_state.ps_ss[2] != ss2) - { - m_state.ps_ss[0] = ss0; - m_state.ps_ss[1] = ss1; - m_state.ps_ss[2] = ss2; - - m_ss_changed = true; - } -} - -void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb) -{ - if(m_state.ps != ps) - { - m_state.ps = ps; - - m_ctx->PSSetShader(ps, NULL, 0); - } - - if(m_srv_changed) - { - m_ctx->PSSetShaderResources(0, countof(m_state.ps_srv), m_state.ps_srv); - - m_srv_changed = false; - } - - if(m_ss_changed) - { - m_ctx->PSSetSamplers(0, countof(m_state.ps_ss), m_state.ps_ss); - - m_ss_changed = false; - } - - if(m_state.ps_cb != ps_cb) - { - m_state.ps_cb = ps_cb; - - m_ctx->PSSetConstantBuffers(0, 1, &ps_cb); - } -} - -void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv) -{ - if(m_state.cs_srv[i] != srv) - { - m_state.cs_srv[i] = srv; - - m_ctx->CSSetShaderResources(i, 1, &srv); - } -} - -void GSDevice11::CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav) -{ - uint32 counters[8]; - - memset(counters, 0, sizeof(counters)); - - m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, counters); -} - -void GSDevice11::CSSetShader(ID3D11ComputeShader* cs, ID3D11Buffer* cs_cb) -{ - if(m_state.cs != cs) - { - m_state.cs = cs; - - m_ctx->CSSetShader(cs, NULL, 0); - } - - if(m_state.cs_cb != cs_cb) - { - m_state.cs_cb = cs_cb; - - m_ctx->CSSetConstantBuffers(0, 1, &cs_cb); - } -} - -void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref) -{ - if(m_state.dss != dss || m_state.sref != sref) - { - m_state.dss = dss; - m_state.sref = sref; - - m_ctx->OMSetDepthStencilState(dss, sref); - } -} - -void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, float bf) -{ - if(m_state.bs != bs || m_state.bf != bf) - { - m_state.bs = bs; - m_state.bf = bf; - - float BlendFactor[] = {bf, bf, bf, 0}; - - m_ctx->OMSetBlendState(bs, BlendFactor, 0xffffffff); - } -} - -void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) -{ - ID3D11RenderTargetView* rtv = NULL; - ID3D11DepthStencilView* dsv = NULL; - - if (!rt && !ds) - throw GSDXRecoverableError(); - - if(rt) rtv = *(GSTexture11*)rt; - if(ds) dsv = *(GSTexture11*)ds; - - if(m_state.rtv != rtv || m_state.dsv != dsv) - { - m_state.rtv = rtv; - m_state.dsv = dsv; - - m_ctx->OMSetRenderTargets(1, &rtv, dsv); - } - - GSVector2i size = rt ? rt->GetSize() : ds->GetSize(); - if(m_state.viewport != size) - { - bool isNative = theApp.GetConfig("upscale_multiplier", 1) == 1; - m_state.viewport = size; - - D3D11_VIEWPORT vp; - - memset(&vp, 0, sizeof(vp)); - - vp.TopLeftX = (spritehack > 0 || isNative) ? 0.0f : -0.01f; - vp.TopLeftY = (spritehack > 0 || isNative) ? 0.0f : -0.01f; - vp.Width = (float)size.x; - vp.Height = (float)size.y; - vp.MinDepth = 0.0f; - vp.MaxDepth = 1.0f; - - m_ctx->RSSetViewports(1, &vp); - } - - GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy(); - - if(!m_state.scissor.eq(r)) - { - m_state.scissor = r; - - m_ctx->RSSetScissorRects(1, r); - } -} - -void GSDevice11::OMSetRenderTargets(const GSVector2i& rtsize, int count, ID3D11UnorderedAccessView** uav, uint32* counters, const GSVector4i* scissor) -{ - m_ctx->OMSetRenderTargetsAndUnorderedAccessViews(0, NULL, NULL, 0, count, uav, counters); - - m_state.rtv = NULL; - m_state.dsv = NULL; - - if(m_state.viewport != rtsize) - { - m_state.viewport = rtsize; - - D3D11_VIEWPORT vp; - - memset(&vp, 0, sizeof(vp)); - - vp.TopLeftX = 0; - vp.TopLeftY = 0; - vp.Width = (float)rtsize.x; - vp.Height = (float)rtsize.y; - vp.MinDepth = 0.0f; - vp.MaxDepth = 1.0f; - - m_ctx->RSSetViewports(1, &vp); - } - - GSVector4i r = scissor ? *scissor : GSVector4i(rtsize).zwxy(); - - if(!m_state.scissor.eq(r)) - { - m_state.scissor = r; - - m_ctx->RSSetScissorRects(1, r); - } -} - -void GSDevice11::CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il) -{ - HRESULT hr; - - vector m; - - PrepareShaderMacro(m, macro); - - CComPtr shader, error; - - hr = s_pD3DCompile(source, size, fn, &m[0], s_old_d3d_compiler_dll? nullptr : include, entry, m_shader.vs.c_str(), 0, 0, &shader, &error); - - if(error) - { - printf("%s\n", (const char*)error->GetBufferPointer()); - } - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } - - hr = m_dev->CreateVertexShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, vs); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } - - hr = m_dev->CreateInputLayout(layout, count, shader->GetBufferPointer(), shader->GetBufferSize(), il); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } -} - -void GSDevice11::CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11GeometryShader** gs) -{ - HRESULT hr; - - vector m; - - PrepareShaderMacro(m, macro); - - CComPtr shader, error; - - hr = s_pD3DCompile(source, size, fn, &m[0], s_old_d3d_compiler_dll ? nullptr : include, entry, m_shader.gs.c_str(), 0, 0, &shader, &error); - - if(error) - { - printf("%s\n", (const char*)error->GetBufferPointer()); - } - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } - - hr = m_dev->CreateGeometryShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, gs); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } -} - -void GSDevice11::CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11GeometryShader** gs, D3D11_SO_DECLARATION_ENTRY* layout, int count) -{ - HRESULT hr; - - vector m; - - PrepareShaderMacro(m, macro); - - CComPtr shader, error; - - hr = s_pD3DCompile(source, size, fn, &m[0], s_old_d3d_compiler_dll ? nullptr : include, entry, m_shader.gs.c_str(), 0, 0, &shader, &error); - - if(error) - { - printf("%s\n", (const char*)error->GetBufferPointer()); - } - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } - - hr = m_dev->CreateGeometryShaderWithStreamOutput((void*)shader->GetBufferPointer(), shader->GetBufferSize(), layout, count, NULL, 0, D3D11_SO_NO_RASTERIZED_STREAM, NULL, gs); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } -} - -void GSDevice11::CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11PixelShader** ps) -{ - HRESULT hr; - - vector m; - - PrepareShaderMacro(m, macro); - - CComPtr shader, error; - - hr = s_pD3DCompile(source, size, fn, &m[0], s_old_d3d_compiler_dll ? nullptr : include, entry, m_shader.ps.c_str(), 0, 0, &shader, &error); - - if(error) - { - printf("%s\n", (const char*)error->GetBufferPointer()); - } - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } - - hr = m_dev->CreatePixelShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, ps); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } -} - -void GSDevice11::CompileShader(const char* source, size_t size, const char *fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11ComputeShader** cs) -{ - HRESULT hr; - - vector m; - - PrepareShaderMacro(m, macro); - - CComPtr shader, error; - - hr = s_pD3DCompile(source, size, fn, &m[0], s_old_d3d_compiler_dll ? nullptr : include, entry, m_shader.cs.c_str(), 0, 0, &shader, &error); - - if(error) - { - printf("%s\n", (const char*)error->GetBufferPointer()); - } - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } - - hr = m_dev->CreateComputeShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, cs); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } -} diff --git a/plugins/GSdx_legacy/GSDevice11.h b/plugins/GSdx_legacy/GSDevice11.h deleted file mode 100644 index 06ef75f230..0000000000 --- a/plugins/GSdx_legacy/GSDevice11.h +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDeviceDX.h" -#include "GSTexture11.h" - -struct GSVertexShader11 -{ - CComPtr vs; - CComPtr il; -}; - -class GSDevice11 : public GSDeviceDX -{ - GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format); - - void DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c); - void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0); - void DoFXAA(GSTexture* sTex, GSTexture* dTex); - void DoShadeBoost(GSTexture* sTex, GSTexture* dTex); - void DoExternalFX(GSTexture* sTex, GSTexture* dTex); - - void InitExternalFX(); - void InitFXAA(); // Bug workaround! Stack corruption? Heap corruption? No idea - - // - - CComPtr m_dev; - CComPtr m_ctx; - CComPtr m_swapchain; - CComPtr m_vb; - CComPtr m_vb_old; - CComPtr m_ib; - CComPtr m_ib_old; - - bool m_srv_changed, m_ss_changed; - int spritehack; - - struct - { - ID3D11Buffer* vb; - size_t vb_stride; - ID3D11Buffer* ib; - ID3D11InputLayout* layout; - D3D11_PRIMITIVE_TOPOLOGY topology; - ID3D11VertexShader* vs; - ID3D11Buffer* vs_cb; - ID3D11GeometryShader* gs; - ID3D11ShaderResourceView* ps_srv[16]; - ID3D11PixelShader* ps; - ID3D11Buffer* ps_cb; - ID3D11SamplerState* ps_ss[3]; - ID3D11ShaderResourceView* cs_srv[16]; - ID3D11ComputeShader* cs; - ID3D11Buffer* cs_cb; - GSVector2i viewport; - GSVector4i scissor; - ID3D11DepthStencilState* dss; - uint8 sref; - ID3D11BlendState* bs; - float bf; - ID3D11RenderTargetView* rtv; - ID3D11DepthStencilView* dsv; - } m_state; - -public: // TODO - CComPtr m_rs; - - bool FXAA_Compiled; - bool ExShader_Compiled; - - struct - { - CComPtr il; - CComPtr vs; - CComPtr ps[10]; - CComPtr ln; - CComPtr pt; - CComPtr dss; - CComPtr bs; - } m_convert; - - struct - { - CComPtr ps[2]; - CComPtr cb; - CComPtr bs; - } m_merge; - - struct - { - CComPtr ps[4]; - CComPtr cb; - } m_interlace; - - struct - { - CComPtr ps; - CComPtr cb; - } m_shaderfx; - - struct - { - CComPtr ps; - CComPtr cb; - } m_fxaa; - - struct - { - CComPtr ps; - CComPtr cb; - } m_shadeboost; - - struct - { - CComPtr dss; - CComPtr bs; - } m_date; - - void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm); - - // Shaders... - - hash_map m_vs; - CComPtr m_vs_cb; - hash_map > m_gs; - hash_map > m_ps; - CComPtr m_ps_cb; - hash_map > m_ps_ss; - CComPtr m_palette_ss; - CComPtr m_rt_ss; - hash_map > m_om_dss; - hash_map > m_om_bs; - - VSConstantBuffer m_vs_cb_cache; - PSConstantBuffer m_ps_cb_cache; - - bool CreateTextureFX(); - -public: - GSDevice11(); - virtual ~GSDevice11(); - - bool Create(GSWnd* wnd); - bool Reset(int w, int h); - void Flip(); - - void SetExclusive(bool isExcl); - - void DrawPrimitive(); - void DrawIndexedPrimitive(); - void DrawIndexedPrimitive(int offset, int count); - void Dispatch(uint32 x, uint32 y, uint32 z); - - void ClearRenderTarget(GSTexture* t, const GSVector4& c); - void ClearRenderTarget(GSTexture* t, uint32 c); - void ClearDepth(GSTexture* t, float c); - void ClearStencil(GSTexture* t, uint8 c); - - GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); - GSTexture* CreateTexture(int w, int h, int format = 0); - GSTexture* CreateOffscreen(int w, int h, int format = 0); - - GSTexture* Resolve(GSTexture* t); - - GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0); - - void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r); - - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true); - - void IASetVertexBuffer(const void* vertex, size_t stride, size_t count); - bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count); - void IAUnmapVertexBuffer(); - void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride); - void IASetIndexBuffer(const void* index, size_t count); - void IASetIndexBuffer(ID3D11Buffer* ib); - void IASetInputLayout(ID3D11InputLayout* layout); - void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology); - void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb); - void GSSetShader(ID3D11GeometryShader* gs); - void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1); - void PSSetShaderResource(int i, GSTexture* sr); - void PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv); - void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb); - void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2 = NULL); - void CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv); - void CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav); - void CSSetShader(ID3D11ComputeShader* cs, ID3D11Buffer* cs_cb); - void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref); - void OMSetBlendState(ID3D11BlendState* bs, float bf); - void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); - void OMSetRenderTargets(const GSVector2i& rtsize, int count, ID3D11UnorderedAccessView** uav, uint32* counters, const GSVector4i* scissor = NULL); - - void SetupVS(VSSelector sel, const VSConstantBuffer* cb); - void SetupGS(GSSelector sel); - void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); - void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix); - - bool HasStencil() { return true; } - bool HasDepth32() { return true; } - - ID3D11Device* operator->() {return m_dev;} - operator ID3D11Device*() {return m_dev;} - operator ID3D11DeviceContext*() {return m_ctx;} - - void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il); - void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11GeometryShader** gs); - void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11GeometryShader** gs, D3D11_SO_DECLARATION_ENTRY* layout, int count); - void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11PixelShader** ps); - void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11ComputeShader** cs); -}; - diff --git a/plugins/GSdx_legacy/GSDevice9.cpp b/plugins/GSdx_legacy/GSDevice9.cpp deleted file mode 100644 index 1414d07fd7..0000000000 --- a/plugins/GSdx_legacy/GSDevice9.cpp +++ /dev/null @@ -1,1523 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSDevice9.h" -#include "resource.h" -#include - -GSDevice9::GSDevice9() - : m_lost(false) -{ - m_rbswapped = true; - FXAA_Compiled = false; - ExShader_Compiled = false; - - - memset(&m_pp, 0, sizeof(m_pp)); - memset(&m_d3dcaps, 0, sizeof(m_d3dcaps)); - memset(&m_state, 0, sizeof(m_state)); - - m_state.bf = 0xffffffff; -} - -GSDevice9::~GSDevice9() -{ - for_each(m_om_bs.begin(), m_om_bs.end(), delete_second()); - for_each(m_om_dss.begin(), m_om_dss.end(), delete_second()); - for_each(m_ps_ss.begin(), m_ps_ss.end(), delete_second()); - for_each(m_mskfix.begin(), m_mskfix.end(), delete_second()); - - if(m_state.vs_cb) _aligned_free(m_state.vs_cb); - if(m_state.ps_cb) _aligned_free(m_state.ps_cb); -} - -static void FindAdapter(IDirect3D9 *d3d9, UINT &adapter, D3DDEVTYPE &devtype, std::string adapter_id = "") -{ - adapter = D3DADAPTER_DEFAULT; - devtype = D3DDEVTYPE_HAL; - - if (!adapter_id.length()) - adapter_id = theApp.GetConfig("Adapter", "default"); - - if (adapter_id == "default") - ; - else if (adapter_id == "ref") - { - devtype = D3DDEVTYPE_REF; - } - else - { - int n = d3d9->GetAdapterCount(); - for (int i = 0; i < n; i++) - { - D3DADAPTER_IDENTIFIER9 id; - if (D3D_OK != d3d9->GetAdapterIdentifier(i, 0, &id)) - break; - if (GSAdapter(id) == adapter_id) - { - adapter = i; - devtype = D3DDEVTYPE_HAL; - break; - } - } - } -} - -// if supported and null != msaa_desc, msaa_desc will contain requested Count and Quality - -static bool IsMsaaSupported(IDirect3D9* d3d, UINT adapter, D3DDEVTYPE devtype, D3DFORMAT depth_format, uint32 msaaCount, DXGI_SAMPLE_DESC* msaa_desc = NULL) -{ - if(msaaCount > 16) return false; - - D3DCAPS9 d3dcaps; - - memset(&d3dcaps, 0, sizeof(d3dcaps)); - - d3d->GetDeviceCaps(adapter, devtype, &d3dcaps); - - DWORD quality[2] = {0, 0}; - - if(SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, D3DFMT_A8R8G8B8, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[0])) && quality[0] > 0 - && SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, depth_format, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[1])) && quality[1] > 0) - { - if(msaa_desc) - { - msaa_desc->Count = msaaCount; - msaa_desc->Quality = std::min(quality[0] - 1, quality[1] - 1); - } - - return true; - } - - return false; -} - -static bool TestDepthFormat(IDirect3D9* d3d, UINT adapter, D3DDEVTYPE devtype, D3DFORMAT format) -{ - if(FAILED(d3d->CheckDeviceFormat(adapter, devtype, D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, format))) - { - return false; - } - - if(FAILED(d3d->CheckDepthStencilMatch(adapter, devtype, D3DFMT_X8R8G8B8, D3DFMT_X8R8G8B8, format))) - { - return false; - } - - return true; -} - -static D3DFORMAT BestD3dFormat(IDirect3D9* d3d, UINT adapter, D3DDEVTYPE devtype, int msaaCount = 0, DXGI_SAMPLE_DESC* msaa_desc = NULL) -{ - // In descending order of preference - - static D3DFORMAT fmts[] = - { - D3DFMT_D32, - D3DFMT_D32F_LOCKABLE, - D3DFMT_D24S8 - }; - - if(1 == msaaCount) msaaCount = 0; - - for(size_t i = 0; i < countof(fmts); i++) - { - if(TestDepthFormat(d3d, adapter, devtype, fmts[i]) && (!msaaCount || IsMsaaSupported(d3d, adapter, devtype, fmts[i], msaaCount, msaa_desc))) - { - return fmts[i]; - } - } - - return D3DFMT_UNKNOWN; -} - -// return: 32, 24, or 0 if not supported. if 1==msaa, considered as msaa=0 - -uint32 GSDevice9::GetMaxDepth(uint32 msaa, std::string adapter_id) -{ - CComPtr d3d; - - d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION)); - - UINT adapter; - D3DDEVTYPE devtype; - - FindAdapter(d3d, adapter, devtype, adapter_id); - - switch(BestD3dFormat(d3d, adapter, devtype, msaa)) - { - case D3DFMT_D32: - case D3DFMT_D32F_LOCKABLE: - return 32; - case D3DFMT_D24S8: - return 24; - } - - return 0; -} - -void GSDevice9::ForceValidMsaaConfig() -{ - if(0 == GetMaxDepth(theApp.GetConfig("UserHacks_MSAA", 0))) - { - theApp.SetConfig("UserHacks_MSAA", 0); // replace invalid msaa value in ini file with 0. - } -}; - -bool GSDevice9::Create(GSWnd* wnd) -{ - if(!__super::Create(wnd)) - { - return false; - } - - // d3d - - m_d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION)); - - if(!m_d3d) return false; - - UINT adapter; - D3DDEVTYPE devtype; - - FindAdapter(m_d3d, adapter, devtype); - - D3DADAPTER_IDENTIFIER9 id; - - if(S_OK == m_d3d->GetAdapterIdentifier(adapter, 0, &id)) - { - printf("%s (%d.%d.%d.%d)\n", - id.Description, - id.DriverVersion.HighPart >> 16, - id.DriverVersion.HighPart & 0xffff, - id.DriverVersion.LowPart >> 16, - id.DriverVersion.LowPart & 0xffff); - } - - ForceValidMsaaConfig(); - - // Get best format/depth for msaa. Assumption is that if the resulting depth is 24 instead of possible 32, - // the user was already warned when she selected it. (Lower res z buffer without warning is unacceptable). - - m_depth_format = BestD3dFormat(m_d3d, adapter, devtype, m_msaa, &m_msaa_desc); - - if(D3DFMT_UNKNOWN == m_depth_format) - { - // can't find a format with requested msaa, try without. - - m_depth_format = BestD3dFormat(m_d3d, adapter, devtype, 0); - - if(D3DFMT_UNKNOWN == m_depth_format) - { - return false; - } - - m_msaa = 0; - } - - memset(&m_d3dcaps, 0, sizeof(m_d3dcaps)); - - m_d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &m_d3dcaps); - - // - - if(m_d3dcaps.VertexShaderVersion < (m_d3dcaps.PixelShaderVersion & ~0x10000)) - { - if(m_d3dcaps.VertexShaderVersion > D3DVS_VERSION(0, 0)) - { - ASSERT(0); - - return false; - } - - // else vertex shader should be emulated in software (gma950) - } - - m_d3dcaps.VertexShaderVersion = m_d3dcaps.PixelShaderVersion & ~0x10000; - - if(m_d3dcaps.PixelShaderVersion >= D3DPS_VERSION(3, 0)) - { - SetFeatureLevel(D3D_FEATURE_LEVEL_9_3, false); - } - else if(m_d3dcaps.PixelShaderVersion >= D3DPS_VERSION(2, 0)) - { - SetFeatureLevel(D3D_FEATURE_LEVEL_9_2, false); - } - else - { - string s = format( - "Supported pixel shader version is too low!\n\nSupported: %d.%d\nNeeded: 2.0 or higher", - D3DSHADER_VERSION_MAJOR(m_d3dcaps.PixelShaderVersion), D3DSHADER_VERSION_MINOR(m_d3dcaps.PixelShaderVersion)); - - MessageBox(NULL, s.c_str(), "GSdx", MB_OK); - - return false; - } - - if(!Reset(1, 1)) - { - return false; - } - - m_dev->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0); - - // convert - - static const D3DVERTEXELEMENT9 il_convert[] = - { - {0, 0, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0}, - {0, 16, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0}, - D3DDECL_END() - }; - - vector shader; - theApp.LoadResource(IDR_CONVERT_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "convert.fx", "vs_main", nullptr, &m_convert.vs, il_convert, countof(il_convert), &m_convert.il); - - for(size_t i = 0; i < countof(m_convert.ps); i++) - { - CompileShader((const char *)shader.data(), shader.size(), "convert.fx", format("ps_main%d", i), nullptr, &m_convert.ps[i]); - } - - m_convert.dss.DepthEnable = false; - m_convert.dss.StencilEnable = false; - - m_convert.bs.BlendEnable = false; - m_convert.bs.RenderTargetWriteMask = D3DCOLORWRITEENABLE_RGBA; - D3DTEXTUREFILTERTYPE LinearToAnisotropic = theApp.GetConfig("MaxAnisotropy", 0) && !theApp.GetConfig("paltex", 0) ? D3DTEXF_ANISOTROPIC : D3DTEXF_LINEAR; - D3DTEXTUREFILTERTYPE PointToAnisotropic = theApp.GetConfig("MaxAnisotropy", 0) && !theApp.GetConfig("paltex", 0) ? D3DTEXF_ANISOTROPIC : D3DTEXF_POINT; - - m_convert.ln.FilterMin[0] = LinearToAnisotropic; - m_convert.ln.FilterMag[0] = LinearToAnisotropic; - m_convert.ln.FilterMin[1] = LinearToAnisotropic; - m_convert.ln.FilterMag[1] = LinearToAnisotropic; - m_convert.ln.AddressU = D3DTADDRESS_CLAMP; - m_convert.ln.AddressV = D3DTADDRESS_CLAMP; - m_convert.ln.MaxAnisotropy = theApp.GetConfig("MaxAnisotropy", 0); - - m_convert.pt.FilterMin[0] = PointToAnisotropic; - m_convert.pt.FilterMag[0] = PointToAnisotropic; - m_convert.pt.FilterMin[1] = PointToAnisotropic; - m_convert.pt.FilterMag[1] = PointToAnisotropic; - m_convert.pt.AddressU = D3DTADDRESS_CLAMP; - m_convert.pt.AddressV = D3DTADDRESS_CLAMP; - m_convert.pt.MaxAnisotropy = theApp.GetConfig("MaxAnisotropy", 0); - - // merge - - theApp.LoadResource(IDR_MERGE_FX, shader); - for(size_t i = 0; i < countof(m_merge.ps); i++) - { - CompileShader((const char *)shader.data(), shader.size(), "merge.fx", format("ps_main%d", i), nullptr, &m_merge.ps[i]); - } - - m_merge.bs.BlendEnable = true; - m_merge.bs.BlendOp = D3DBLENDOP_ADD; - m_merge.bs.SrcBlend = D3DBLEND_SRCALPHA; - m_merge.bs.DestBlend = D3DBLEND_INVSRCALPHA; - m_merge.bs.BlendOpAlpha = D3DBLENDOP_ADD; - m_merge.bs.SrcBlendAlpha = D3DBLEND_ONE; - m_merge.bs.DestBlendAlpha = D3DBLEND_ZERO; - m_merge.bs.RenderTargetWriteMask = D3DCOLORWRITEENABLE_RGBA; - - // interlace - - theApp.LoadResource(IDR_INTERLACE_FX, shader); - for(size_t i = 0; i < countof(m_interlace.ps); i++) - { - CompileShader((const char *)shader.data(), shader.size(), "interlace.fx", format("ps_main%d", i), nullptr, &m_interlace.ps[i]); - } - - // Shade Boost - - int ShadeBoost_Contrast = theApp.GetConfig("ShadeBoost_Contrast", 50); - int ShadeBoost_Brightness = theApp.GetConfig("ShadeBoost_Brightness", 50); - int ShadeBoost_Saturation = theApp.GetConfig("ShadeBoost_Saturation", 50); - - string str[3]; - - str[0] = format("%d", ShadeBoost_Saturation); - str[1] = format("%d", ShadeBoost_Brightness); - str[2] = format("%d", ShadeBoost_Contrast); - - D3D_SHADER_MACRO macro[] = - { - {"SB_SATURATION", str[0].c_str()}, - {"SB_BRIGHTNESS", str[1].c_str()}, - {"SB_CONTRAST", str[2].c_str()}, - {NULL, NULL}, - }; - - theApp.LoadResource(IDR_SHADEBOOST_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "shadeboost.fx", "ps_main", macro, &m_shadeboost.ps); - - // create shader layout - - VSSelector sel; - VSConstantBuffer cb; - - SetupVS(sel, &cb); - - // - - memset(&m_date.dss, 0, sizeof(m_date.dss)); - - m_date.dss.StencilEnable = true; - m_date.dss.StencilReadMask = 1; - m_date.dss.StencilWriteMask = 1; - m_date.dss.StencilFunc = D3DCMP_ALWAYS; - m_date.dss.StencilPassOp = D3DSTENCILOP_REPLACE; - m_date.dss.StencilRef = 1; - - memset(&m_date.bs, 0, sizeof(m_date.bs)); - - // - - return true; -} - -bool GSDevice9::Reset(int w, int h) -{ - if(!__super::Reset(w, h)) - return false; - - HRESULT hr; - - int mode = (!m_wnd->IsManaged() || theApp.GetConfig("windowed", 1)) ? Windowed : Fullscreen; - - if(mode == DontCare) - { - mode = m_pp.Windowed ? Windowed : Fullscreen; - } - - if(!m_lost) - { - if(m_swapchain && mode != Fullscreen && m_pp.Windowed) - { - m_swapchain = NULL; - - m_pp.BackBufferWidth = w; - m_pp.BackBufferHeight = h; - m_pp.PresentationInterval = m_vsync ? D3DPRESENT_INTERVAL_ONE : D3DPRESENT_INTERVAL_IMMEDIATE; - - hr = m_dev->CreateAdditionalSwapChain(&m_pp, &m_swapchain); - - if(FAILED(hr)) return false; - - CComPtr backbuffer; - hr = m_swapchain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &backbuffer); - m_backbuffer = new GSTexture9(backbuffer); - - return true; - } - } - - m_swapchain = NULL; - - m_vb = NULL; - m_vb_old = NULL; - - m_vertex.start = 0; - m_vertex.count = 0; - m_index.start = 0; - m_index.count = 0; - - if(m_state.vs_cb) _aligned_free(m_state.vs_cb); - if(m_state.ps_cb) _aligned_free(m_state.ps_cb); - - memset(&m_state, 0, sizeof(m_state)); - - m_state.bf = 0xffffffff; - - memset(&m_pp, 0, sizeof(m_pp)); - - m_pp.Windowed = TRUE; - m_pp.hDeviceWindow = (HWND)m_wnd->GetHandle(); - m_pp.SwapEffect = D3DSWAPEFFECT_FLIP; - m_pp.BackBufferFormat = D3DFMT_X8R8G8B8; - m_pp.BackBufferWidth = 1; - m_pp.BackBufferHeight = 1; - m_pp.PresentationInterval = m_vsync ? D3DPRESENT_INTERVAL_ONE : D3DPRESENT_INTERVAL_IMMEDIATE; - - // m_pp.Flags |= D3DPRESENTFLAG_VIDEO; // enables tv-out (but I don't think anyone would still use a regular tv...) - - int mw = theApp.GetConfig("ModeWidth", 0); - int mh = theApp.GetConfig("ModeHeight", 0); - int mrr = theApp.GetConfig("ModeRefreshRate", 0); - - if(m_wnd->IsManaged() && mode == Fullscreen && mw > 0 && mh > 0 && mrr >= 0) - { - m_pp.Windowed = FALSE; - m_pp.BackBufferWidth = mw; - m_pp.BackBufferHeight = mh; - // m_pp.FullScreen_RefreshRateInHz = mrr; - - m_wnd->HideFrame(); - } - - if(!m_dev) - { - uint32 flags = m_d3dcaps.VertexProcessingCaps ? D3DCREATE_HARDWARE_VERTEXPROCESSING : D3DCREATE_SOFTWARE_VERTEXPROCESSING; - - if(flags & D3DCREATE_HARDWARE_VERTEXPROCESSING) - { - flags |= D3DCREATE_PUREDEVICE; - } - - hr = m_d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, (HWND)m_wnd->GetHandle(), flags, &m_pp, &m_dev); - - if(FAILED(hr)) return false; - } - else - { - hr = m_dev->Reset(&m_pp); - - if(FAILED(hr)) - { - if(D3DERR_DEVICELOST == hr) - { - Sleep(1000); - - hr = m_dev->Reset(&m_pp); - } - - if(FAILED(hr)) return false; - } - } - - if(m_pp.Windowed) - { - m_pp.BackBufferWidth = 1; - m_pp.BackBufferHeight = 1; - - hr = m_dev->CreateAdditionalSwapChain(&m_pp, &m_swapchain); - - if(FAILED(hr)) return false; - } - - CComPtr backbuffer; - - if(m_swapchain) - { - hr = m_swapchain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &backbuffer); - } - else - { - hr = m_dev->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &backbuffer); - } - - m_backbuffer = new GSTexture9(backbuffer); - - m_dev->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); - m_dev->SetRenderState(D3DRS_LIGHTING, FALSE); - m_dev->SetRenderState(D3DRS_ALPHATESTENABLE, FALSE); - m_dev->SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE); - - return true; -} - -bool GSDevice9::IsLost(bool update) -{ - if(!m_lost || update) - { - HRESULT hr = m_dev->TestCooperativeLevel(); - - m_lost = hr == D3DERR_DEVICELOST || hr == D3DERR_DEVICENOTRESET; - } - - return m_lost; -} - -void GSDevice9::Flip() -{ - m_dev->EndScene(); - - HRESULT hr; - - if(m_swapchain) - { - hr = m_swapchain->Present(NULL, NULL, NULL, NULL, 0); - } - else - { - hr = m_dev->Present(NULL, NULL, NULL, NULL); - } - - m_dev->BeginScene(); - - if(FAILED(hr)) - { - m_lost = true; - } -} - -void GSDevice9::SetVSync(bool enable) -{ - if(m_vsync == enable) return; - - __super::SetVSync(enable); - - // Clever trick: Delete the backbuffer, so that the next Present will fail and - // cause a DXDevice9::Reset call, which re-creates the backbuffer with current - // vsync settings. :) - - delete m_backbuffer; - - m_backbuffer = NULL; -} - -void GSDevice9::BeginScene() -{ - // m_dev->BeginScene(); -} - -void GSDevice9::DrawPrimitive() -{ - int prims = 0; - - switch(m_state.topology) - { - case D3DPT_POINTLIST: - prims = m_vertex.count; - break; - case D3DPT_LINELIST: - prims = m_vertex.count / 2; - break; - case D3DPT_LINESTRIP: - prims = m_vertex.count - 1; - break; - case D3DPT_TRIANGLELIST: - prims = m_vertex.count / 3; - break; - case D3DPT_TRIANGLESTRIP: - case D3DPT_TRIANGLEFAN: - prims = m_vertex.count - 2; - break; - default: - __assume(0); - } - - m_dev->DrawPrimitive(m_state.topology, m_vertex.start, prims); -} - -void GSDevice9::DrawIndexedPrimitive() -{ - int prims = 0; - - switch(m_state.topology) - { - case D3DPT_POINTLIST: - prims = m_index.count; - break; - case D3DPT_LINELIST: - case D3DPT_LINESTRIP: - prims = m_index.count / 2; - break; - case D3DPT_TRIANGLELIST: - case D3DPT_TRIANGLESTRIP: - case D3DPT_TRIANGLEFAN: - prims = m_index.count / 3; - break; - default: - __assume(0); - } - - m_dev->DrawIndexedPrimitive(m_state.topology, m_vertex.start, 0, m_index.count, m_index.start, prims); -} - -void GSDevice9::EndScene() -{ - // m_dev->EndScene(); - - __super::EndScene(); -} - -void GSDevice9::ClearRenderTarget(GSTexture* t, const GSVector4& c) -{ - if (!t) return; - ClearRenderTarget(t, (c * 255 + 0.5f).zyxw().rgba32()); -} - -void GSDevice9::ClearRenderTarget(GSTexture* rt, uint32 c) -{ - if (!rt) return; - CComPtr surface; - m_dev->GetRenderTarget(0, &surface); - m_dev->SetRenderTarget(0, *(GSTexture9*)rt); - m_dev->Clear(0, NULL, D3DCLEAR_TARGET, c, 0, 0); - m_dev->SetRenderTarget(0, surface); -} - -void GSDevice9::ClearDepth(GSTexture* t, float c) -{ - if (!t) return; - CComPtr dssurface; - m_dev->GetDepthStencilSurface(&dssurface); - m_dev->SetDepthStencilSurface(*(GSTexture9*)t); - m_dev->Clear(0, NULL, D3DCLEAR_ZBUFFER, 0, c, 0); - m_dev->SetDepthStencilSurface(dssurface); -} - -void GSDevice9::ClearStencil(GSTexture* t, uint8 c) -{ - if (!t) return; - CComPtr dssurface; - m_dev->GetDepthStencilSurface(&dssurface); - m_dev->SetDepthStencilSurface(*(GSTexture9*)t); - m_dev->Clear(0, NULL, D3DCLEAR_STENCIL, 0, 0, c); - m_dev->SetDepthStencilSurface(dssurface); -} - -GSTexture* GSDevice9::CreateSurface(int type, int w, int h, bool msaa, int format) -{ - HRESULT hr; - - CComPtr texture; - CComPtr surface; - - switch(type) - { - case GSTexture::RenderTarget: - if(msaa) hr = m_dev->CreateRenderTarget(w, h, (D3DFORMAT)format, (D3DMULTISAMPLE_TYPE)m_msaa_desc.Count, m_msaa_desc.Quality, FALSE, &surface, NULL); - else hr = m_dev->CreateTexture(w, h, 1, D3DUSAGE_RENDERTARGET, (D3DFORMAT)format, D3DPOOL_DEFAULT, &texture, NULL); - break; - case GSTexture::DepthStencil: - if(msaa) hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, (D3DMULTISAMPLE_TYPE)m_msaa_desc.Count, m_msaa_desc.Quality, FALSE, &surface, NULL); - else hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, D3DMULTISAMPLE_NONE, 0, FALSE, &surface, NULL); - break; - case GSTexture::Texture: - hr = m_dev->CreateTexture(w, h, 1, 0, (D3DFORMAT)format, D3DPOOL_MANAGED, &texture, NULL); - break; - case GSTexture::Offscreen: - hr = m_dev->CreateOffscreenPlainSurface(w, h, (D3DFORMAT)format, D3DPOOL_SYSTEMMEM, &surface, NULL); - break; - } - - GSTexture9* t = NULL; - - if(surface) - { - t = new GSTexture9(surface); - } - - if(texture) - { - t = new GSTexture9(texture); - } - - if(t) - { - switch(type) - { - case GSTexture::RenderTarget: - ClearRenderTarget(t, 0); - break; - case GSTexture::DepthStencil: - ClearDepth(t, 0); - break; - } - } - - return t; -} - -GSTexture* GSDevice9::CreateRenderTarget(int w, int h, bool msaa, int format) -{ - return __super::CreateRenderTarget(w, h, msaa, format ? format : D3DFMT_A8R8G8B8); -} - -GSTexture* GSDevice9::CreateDepthStencil(int w, int h, bool msaa, int format) -{ - return __super::CreateDepthStencil(w, h, msaa, format ? format : m_depth_format); -} - -GSTexture* GSDevice9::CreateTexture(int w, int h, int format) -{ - return __super::CreateTexture(w, h, format ? format : D3DFMT_A8R8G8B8); -} - -GSTexture* GSDevice9::CreateOffscreen(int w, int h, int format) -{ - return __super::CreateOffscreen(w, h, format ? format : D3DFMT_A8R8G8B8); -} - -GSTexture* GSDevice9::Resolve(GSTexture* t) -{ - ASSERT(t != NULL && t->IsMSAA()); - - if(GSTexture* dst = CreateRenderTarget(t->GetWidth(), t->GetHeight(), false, t->GetFormat())) - { - dst->SetScale(t->GetScale()); - - m_dev->StretchRect(*(GSTexture9*)t, NULL, *(GSTexture9*)dst, NULL, D3DTEXF_POINT); - - return dst; - } - - return NULL; -} - -GSTexture* GSDevice9::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format, int ps_shader) -{ - GSTexture* dst = NULL; - - if(format == 0) - { - format = D3DFMT_A8R8G8B8; - } - - if(format != D3DFMT_A8R8G8B8) - { - ASSERT(0); - - return false; - } - - if(GSTexture* rt = CreateRenderTarget(w, h, false, format)) - { - GSVector4 dRect(0, 0, w, h); - - if(GSTexture* src2 = src->IsMSAA() ? Resolve(src) : src) - { - StretchRect(src2, sRect, rt, dRect, m_convert.ps[1], NULL, 0); - - if(src2 != src) Recycle(src2); - } - - dst = CreateOffscreen(w, h, format); - - if(dst) - { - m_dev->GetRenderTargetData(*(GSTexture9*)rt, *(GSTexture9*)dst); - } - - Recycle(rt); - } - - return dst; -} - -void GSDevice9::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) -{ - if(!sTex || !dTex) - { - ASSERT(0); - return; - } - - m_dev->StretchRect(*(GSTexture9*)sTex, r, *(GSTexture9*)dTex, r, D3DTEXF_NONE); -} - -void GSDevice9::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear) -{ - StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[shader], NULL, 0, linear); -} - -void GSDevice9::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear) -{ - StretchRect(sTex, sRect, dTex, dRect, ps, ps_cb, ps_cb_len, &m_convert.bs, linear); -} - -void GSDevice9::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear) -{ - if(!sTex || !dTex) - { - ASSERT(0); - return; - } - - BeginScene(); - - GSVector2i ds = dTex->GetSize(); - - // om - - OMSetDepthStencilState(&m_convert.dss); - OMSetBlendState(bs, 0); - OMSetRenderTargets(dTex, NULL); - - // ia - - float left = dRect.x * 2 / ds.x - 1.0f; - float top = 1.0f - dRect.y * 2 / ds.y; - float right = dRect.z * 2 / ds.x - 1.0f; - float bottom = 1.0f - dRect.w * 2 / ds.y; - - GSVertexPT1 vertices[] = - { - {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)}, - {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)}, - {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)}, - {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)}, - }; - - for(size_t i = 0; i < countof(vertices); i++) - { - vertices[i].p.x -= 1.0f / ds.x; - vertices[i].p.y += 1.0f / ds.y; - } - - IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); - IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); - IASetInputLayout(m_convert.il); - - // vs - - VSSetShader(m_convert.vs, NULL, 0); - - // ps - - PSSetSamplerState(linear ? &m_convert.ln : &m_convert.pt); - PSSetShaderResources(sTex, NULL); - PSSetShader(ps, ps_cb, ps_cb_len); - - // - - DrawPrimitive(); - - // - - EndScene(); -} - -void GSDevice9::DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c) -{ - ClearRenderTarget(dTex, c); - - if(sTex[1] && !slbg) - { - StretchRect(sTex[1], sRect[1], dTex, dRect[1], m_merge.ps[0], NULL, true); - } - - if(sTex[0]) - { - MergeConstantBuffer cb; - - cb.BGColor = c; - - StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge.ps[mmod ? 1 : 0], (const float*)&cb, 1, &m_merge.bs, true); - } -} - -void GSDevice9::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) -{ - GSVector4 s = GSVector4(dTex->GetSize()); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0.0f, yoffset, s.x, s.y + yoffset); - - InterlaceConstantBuffer cb; - - cb.ZrH = GSVector2(0, 1.0f / s.y); - cb.hH = (float)s.y / 2; - - StretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[shader], (const float*)&cb, 1, linear); -} - -void GSDevice9::InitExternalFX() -{ - if (!ExShader_Compiled) - { - try { - std::string config_name(theApp.GetConfig("shaderfx_conf", "shaders/GSdx_FX_Settings.ini")); - std::ifstream fconfig(config_name); - std::stringstream shader; - if (fconfig.good()) - shader << fconfig.rdbuf() << "\n"; - else - fprintf(stderr, "GSdx: External shader config '%s' not loaded.\n", config_name.c_str()); - - std::string shader_name(theApp.GetConfig("shaderfx_glsl", "shaders/GSdx.fx")); - std::ifstream fshader(shader_name); - if (fshader.good()) - { - shader << fshader.rdbuf(); - CompileShader(shader.str().c_str(), shader.str().length(), shader_name.c_str(), "ps_main", nullptr, &m_shaderfx.ps); - } - else - { - fprintf(stderr, "GSdx: External shader '%s' not loaded and will be disabled!\n", shader_name.c_str()); - } - } - catch (GSDXRecoverableError) { - printf("GSdx: failed to compile external post-processing shader. \n"); - } - ExShader_Compiled = true; - } -} - -void GSDevice9::DoExternalFX(GSTexture* sTex, GSTexture* dTex) -{ - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - ExternalFXConstantBuffer cb; - - InitExternalFX(); - - cb.xyFrame = GSVector2(s.x, s.y); - cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); - cb.rcpFrameOpt = GSVector4::zero(); - - StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps, (const float*)&cb, 2, true); -} - -void GSDevice9::InitFXAA() -{ - if (!FXAA_Compiled) - { - try { - vector shader; - theApp.LoadResource(IDR_FXAA_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "fxaa.fx", "ps_main", nullptr, &m_fxaa.ps); - } - catch (GSDXRecoverableError) { - printf("GSdx: Failed to compile fxaa shader.\n"); - } - FXAA_Compiled = true; - } -} - -void GSDevice9::DoFXAA(GSTexture* sTex, GSTexture* dTex) -{ - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - FXAAConstantBuffer cb; - - InitFXAA(); - - cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); - cb.rcpFrameOpt = GSVector4::zero(); - - StretchRect(sTex, sRect, dTex, dRect, m_fxaa.ps, (const float*)&cb, 2, true); -} - -void GSDevice9::DoShadeBoost(GSTexture* sTex, GSTexture* dTex) -{ - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - ShadeBoostConstantBuffer cb; - - cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); - cb.rcpFrameOpt = GSVector4::zero(); - - StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps, (const float*)&cb, 1, true); -} - -void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm) -{ - const GSVector2i& size = rt->GetSize(); - - if(GSTexture* t = CreateRenderTarget(size.x, size.y, rt->IsMSAA())) - { - // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows - - BeginScene(); - - ClearStencil(ds, 0); - - // om - - OMSetDepthStencilState(&m_date.dss); - OMSetBlendState(&m_date.bs, 0); - OMSetRenderTargets(t, ds); - - // ia - - IASetVertexBuffer(vertices, sizeof(vertices[0]), 4); - IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); - - // vs - - VSSetShader(m_convert.vs, NULL, 0); - IASetInputLayout(m_convert.il); - - // ps - - GSTexture* rt2 = rt->IsMSAA() ? Resolve(rt) : rt; - - PSSetShaderResources(rt2, NULL); - PSSetShader(m_convert.ps[datm ? 2 : 3], NULL, 0); - PSSetSamplerState(&m_convert.pt); - - // - - DrawPrimitive(); - - // - - EndScene(); - - Recycle(t); - - if(rt2 != rt) Recycle(rt2); - } -} - -void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count) -{ - void* ptr = NULL; - - if(IAMapVertexBuffer(&ptr, stride, count)) - { - GSVector4i::storent(ptr, vertex, count * stride); - - IAUnmapVertexBuffer(); - } -} - -bool GSDevice9::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) -{ - ASSERT(m_vertex.count == 0); - - if(count * stride > m_vertex.limit * m_vertex.stride) - { - m_vb_old = m_vb; - m_vb = NULL; - - m_vertex.start = 0; - m_vertex.count = 0; - m_vertex.limit = std::max(count * 3 / 2, 10000); - } - - if(m_vb == NULL) - { - HRESULT hr; - - hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL); - - if(FAILED(hr)) return false; - } - - uint32 flags = D3DLOCK_NOOVERWRITE; - - if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride) - { - m_vertex.start = 0; - - flags = D3DLOCK_DISCARD; - } - - if(FAILED(m_vb->Lock(m_vertex.start * stride, count * stride, vertex, flags))) - { - return false; - } - - m_vertex.count = count; - m_vertex.stride = stride; - - return true; -} - -void GSDevice9::IAUnmapVertexBuffer() -{ - m_vb->Unlock(); - - IASetVertexBuffer(m_vb, m_vertex.stride); -} - -void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) -{ - if(m_state.vb != vb || m_state.vb_stride != stride) - { - m_state.vb = vb; - m_state.vb_stride = stride; - - m_dev->SetStreamSource(0, vb, 0, stride); - } -} - -void GSDevice9::IASetIndexBuffer(const void* index, size_t count) -{ - ASSERT(m_index.count == 0); - - if(count > m_index.limit) - { - m_ib_old = m_ib; - m_ib = NULL; - - m_index.count = 0; - m_index.limit = std::max(count * 3 / 2, 11000); - } - - if(m_ib == NULL) - { - HRESULT hr; - - hr = m_dev->CreateIndexBuffer(m_index.limit * sizeof(uint32), D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX32, D3DPOOL_DEFAULT, &m_ib, NULL); - - if(FAILED(hr)) return; - } - - uint32 flags = D3DLOCK_NOOVERWRITE; - - if(m_index.start + count > m_index.limit) - { - m_index.start = 0; - - flags = D3DLOCK_DISCARD; - } - - void* ptr = NULL; - - if(SUCCEEDED(m_ib->Lock(m_index.start * sizeof(uint32), count * sizeof(uint32), &ptr, flags))) - { - memcpy(ptr, index, count * sizeof(uint32)); - - m_ib->Unlock(); - } - - m_index.count = count; - - IASetIndexBuffer(m_ib); -} - -void GSDevice9::IASetIndexBuffer(IDirect3DIndexBuffer9* ib) -{ - if(m_state.ib != ib) - { - m_state.ib = ib; - - m_dev->SetIndices(ib); - } -} - -void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout) -{ - if(m_state.layout != layout) - { - m_state.layout = layout; - - m_dev->SetVertexDeclaration(layout); - } -} - -void GSDevice9::IASetPrimitiveTopology(D3DPRIMITIVETYPE topology) -{ - m_state.topology = topology; -} - -void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len) -{ - if(m_state.vs != vs) - { - m_state.vs = vs; - - m_dev->SetVertexShader(vs); - } - - if(vs_cb && vs_cb_len > 0) - { - int size = vs_cb_len * sizeof(float) * 4; - - if(m_state.vs_cb_len != vs_cb_len || m_state.vs_cb == NULL || memcmp(m_state.vs_cb, vs_cb, size)) - { - if(m_state.vs_cb == NULL || m_state.vs_cb_len < vs_cb_len) - { - if(m_state.vs_cb) _aligned_free(m_state.vs_cb); - - m_state.vs_cb = (float*)_aligned_malloc(size, 32); - } - - m_state.vs_cb_len = vs_cb_len; - - memcpy(m_state.vs_cb, vs_cb, size); - - m_dev->SetVertexShaderConstantF(0, vs_cb, vs_cb_len); - } - } -} - -void GSDevice9::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) -{ - PSSetShaderResource(0, sr0); - PSSetShaderResource(1, sr1); - PSSetShaderResource(2, NULL); -} - -void GSDevice9::PSSetShaderResource(int i, GSTexture* sr) -{ - IDirect3DTexture9* srv = NULL; - - if(sr) srv = *(GSTexture9*)sr; - - if(m_state.ps_srvs[i] != srv) - { - m_state.ps_srvs[i] = srv; - - m_dev->SetTexture(i, srv); - } -} - -void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len) -{ - if(m_state.ps != ps) - { - m_state.ps = ps; - - m_dev->SetPixelShader(ps); - } - - if(ps_cb && ps_cb_len > 0) - { - int size = ps_cb_len * sizeof(float) * 4; - - if(m_state.ps_cb_len != ps_cb_len || m_state.ps_cb == NULL || memcmp(m_state.ps_cb, ps_cb, size)) - { - if(m_state.ps_cb == NULL || m_state.ps_cb_len < ps_cb_len) - { - if(m_state.ps_cb) _aligned_free(m_state.ps_cb); - - m_state.ps_cb = (float*)_aligned_malloc(size, 32); - } - - m_state.ps_cb_len = ps_cb_len; - - memcpy(m_state.ps_cb, ps_cb, size); - - m_dev->SetPixelShaderConstantF(0, ps_cb, ps_cb_len); - } - } -} - -void GSDevice9::PSSetSamplerState(Direct3DSamplerState9* ss) -{ - if(ss && m_state.ps_ss != ss) - { - m_state.ps_ss = ss; - - m_dev->SetSamplerState(0, D3DSAMP_MINFILTER, ss->FilterMin[0]); - m_dev->SetSamplerState(0, D3DSAMP_MAGFILTER, ss->FilterMag[0]); - m_dev->SetSamplerState(0, D3DSAMP_MIPFILTER, ss->FilterMip[0]); - m_dev->SetSamplerState(0, D3DSAMP_ADDRESSU, ss->AddressU); - m_dev->SetSamplerState(0, D3DSAMP_ADDRESSV, ss->AddressV); - m_dev->SetSamplerState(0, D3DSAMP_ADDRESSW, ss->AddressW); - m_dev->SetSamplerState(0, D3DSAMP_MAXANISOTROPY, ss->MaxAnisotropy); - m_dev->SetSamplerState(0, D3DSAMP_MAXMIPLEVEL, ss->MaxLOD); - - m_dev->SetSamplerState(1, D3DSAMP_MINFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(1, D3DSAMP_MAGFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(1, D3DSAMP_MIPFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(1, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); - m_dev->SetSamplerState(1, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); - m_dev->SetSamplerState(1, D3DSAMP_ADDRESSW, D3DTADDRESS_CLAMP); - m_dev->SetSamplerState(1, D3DSAMP_MAXANISOTROPY, ss->MaxAnisotropy); - m_dev->SetSamplerState(1, D3DSAMP_MAXMIPLEVEL, ss->MaxLOD); - - m_dev->SetSamplerState(2, D3DSAMP_MINFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(2, D3DSAMP_MAGFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(2, D3DSAMP_MIPFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(2, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); - m_dev->SetSamplerState(2, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); - m_dev->SetSamplerState(2, D3DSAMP_ADDRESSW, D3DTADDRESS_CLAMP); - m_dev->SetSamplerState(2, D3DSAMP_MAXANISOTROPY, ss->MaxAnisotropy); - m_dev->SetSamplerState(2, D3DSAMP_MAXMIPLEVEL, ss->MaxLOD); - - m_dev->SetSamplerState(3, D3DSAMP_MINFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(3, D3DSAMP_MAGFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(3, D3DSAMP_MIPFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(3, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP); - m_dev->SetSamplerState(3, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP); - m_dev->SetSamplerState(3, D3DSAMP_ADDRESSW, D3DTADDRESS_CLAMP); - m_dev->SetSamplerState(3, D3DSAMP_MAXANISOTROPY, ss->MaxAnisotropy); - m_dev->SetSamplerState(3, D3DSAMP_MAXMIPLEVEL, ss->MaxLOD); - - m_dev->SetSamplerState(4, D3DSAMP_MINFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(4, D3DSAMP_MAGFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(4, D3DSAMP_MIPFILTER, ss->Anisotropic[1]); - m_dev->SetSamplerState(4, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP); - m_dev->SetSamplerState(4, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP); - m_dev->SetSamplerState(4, D3DSAMP_ADDRESSW, D3DTADDRESS_CLAMP); - m_dev->SetSamplerState(4, D3DSAMP_MAXANISOTROPY, ss->MaxAnisotropy); - m_dev->SetSamplerState(4, D3DSAMP_MAXMIPLEVEL, ss->MaxLOD); - } -} - -void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss) -{ - if(m_state.dss != dss) - { - m_state.dss = dss; - - m_dev->SetRenderState(D3DRS_ZENABLE, dss->DepthEnable); - m_dev->SetRenderState(D3DRS_ZWRITEENABLE, dss->DepthWriteMask); - - if(dss->DepthEnable) - { - m_dev->SetRenderState(D3DRS_ZFUNC, dss->DepthFunc); - } - - m_dev->SetRenderState(D3DRS_STENCILENABLE, dss->StencilEnable); - - if(dss->StencilEnable) - { - m_dev->SetRenderState(D3DRS_STENCILMASK, dss->StencilReadMask); - m_dev->SetRenderState(D3DRS_STENCILWRITEMASK, dss->StencilWriteMask); - m_dev->SetRenderState(D3DRS_STENCILFUNC, dss->StencilFunc); - m_dev->SetRenderState(D3DRS_STENCILPASS, dss->StencilPassOp); - m_dev->SetRenderState(D3DRS_STENCILFAIL, dss->StencilFailOp); - m_dev->SetRenderState(D3DRS_STENCILZFAIL, dss->StencilDepthFailOp); - m_dev->SetRenderState(D3DRS_STENCILREF, dss->StencilRef); - } - } -} - -void GSDevice9::OMSetBlendState(Direct3DBlendState9* bs, uint32 bf) -{ - if(m_state.bs != bs || m_state.bf != bf) - { - m_state.bs = bs; - m_state.bf = bf; - - m_dev->SetRenderState(D3DRS_ALPHABLENDENABLE, bs->BlendEnable); - - if(bs->BlendEnable) - { - m_dev->SetRenderState(D3DRS_BLENDOP, bs->BlendOp); - m_dev->SetRenderState(D3DRS_SRCBLEND, bs->SrcBlend); - m_dev->SetRenderState(D3DRS_DESTBLEND, bs->DestBlend); - m_dev->SetRenderState(D3DRS_SEPARATEALPHABLENDENABLE, TRUE); - m_dev->SetRenderState(D3DRS_BLENDOPALPHA, bs->BlendOpAlpha); - m_dev->SetRenderState(D3DRS_SRCBLENDALPHA, bs->SrcBlendAlpha); - m_dev->SetRenderState(D3DRS_DESTBLENDALPHA, bs->DestBlendAlpha); - m_dev->SetRenderState(D3DRS_BLENDFACTOR, bf); - } - - m_dev->SetRenderState(D3DRS_COLORWRITEENABLE, bs->RenderTargetWriteMask); - } -} - -void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) -{ - IDirect3DSurface9* rtv = NULL; - IDirect3DSurface9* dsv = NULL; - - if(rt) rtv = *(GSTexture9*)rt; - if(ds) dsv = *(GSTexture9*)ds; - - if(m_state.rtv != rtv) - { - m_state.rtv = rtv; - - m_dev->SetRenderTarget(0, rtv); - } - - if(m_state.dsv != dsv) - { - m_state.dsv = dsv; - - m_dev->SetDepthStencilSurface(dsv); - } - - GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); - - if(!m_state.scissor.eq(r)) - { - m_state.scissor = r; - - m_dev->SetScissorRect(r); - } -} - -void GSDevice9::CompileShader(const char *source, size_t size, const char *filename, const string& entry, const D3D_SHADER_MACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il) -{ - vector m; - - PrepareShaderMacro(m, macro); - - HRESULT hr; - - CComPtr shader, error; - - hr = s_pD3DCompile(source, size, nullptr, &m[0], nullptr, entry.c_str(), m_shader.vs.c_str(), 0, 0, &shader, &error); - - if(SUCCEEDED(hr)) - { - hr = m_dev->CreateVertexShader((DWORD*)shader->GetBufferPointer(), vs); - } - else if(error) - { - printf("%s\n", (const char*)error->GetBufferPointer()); - } - - ASSERT(SUCCEEDED(hr)); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } - - hr = m_dev->CreateVertexDeclaration(layout, il); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } -} - -void GSDevice9::CompileShader(const char *source, size_t size, const char *filename, const string& entry, const D3D_SHADER_MACRO* macro, IDirect3DPixelShader9** ps) -{ - uint32 flags = 0; - - if(m_shader.level >= D3D_FEATURE_LEVEL_9_3) - { - flags |= D3DCOMPILE_AVOID_FLOW_CONTROL; - } - else - { - flags |= D3DCOMPILE_SKIP_VALIDATION; - } - - vector m; - - PrepareShaderMacro(m, macro); - - HRESULT hr; - - CComPtr shader, error; - hr = s_pD3DCompile(source, size, filename, &m[0], nullptr, entry.c_str(), m_shader.ps.c_str(), flags, 0, &shader, &error); - - if(SUCCEEDED(hr)) - { - hr = m_dev->CreatePixelShader((DWORD*)shader->GetBufferPointer(), ps); - } - else if(error) - { - printf("%s\n", (const char*)error->GetBufferPointer()); - } - - ASSERT(SUCCEEDED(hr)); - - if(FAILED(hr)) - { - throw GSDXRecoverableError(); - } -} diff --git a/plugins/GSdx_legacy/GSDevice9.h b/plugins/GSdx_legacy/GSDevice9.h deleted file mode 100644 index 6bb1477e34..0000000000 --- a/plugins/GSdx_legacy/GSDevice9.h +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDeviceDX.h" -#include "GSTexture9.h" - -struct Direct3DSamplerState9 -{ - D3DTEXTUREFILTERTYPE FilterMin[2]; - D3DTEXTUREFILTERTYPE FilterMag[2]; - D3DTEXTUREFILTERTYPE FilterMip[2]; - D3DTEXTUREFILTERTYPE Anisotropic[2]; - D3DTEXTUREADDRESS AddressU; - D3DTEXTUREADDRESS AddressV; - D3DTEXTUREADDRESS AddressW; - DWORD MaxAnisotropy; - DWORD MaxLOD; -}; - -struct Direct3DDepthStencilState9 -{ - BOOL DepthEnable; - BOOL DepthWriteMask; - D3DCMPFUNC DepthFunc; - BOOL StencilEnable; - UINT8 StencilReadMask; - UINT8 StencilWriteMask; - D3DSTENCILOP StencilFailOp; - D3DSTENCILOP StencilDepthFailOp; - D3DSTENCILOP StencilPassOp; - D3DCMPFUNC StencilFunc; - uint32 StencilRef; -}; - -struct Direct3DBlendState9 -{ - BOOL BlendEnable; - D3DBLEND SrcBlend; - D3DBLEND DestBlend; - D3DBLENDOP BlendOp; - D3DBLEND SrcBlendAlpha; - D3DBLEND DestBlendAlpha; - D3DBLENDOP BlendOpAlpha; - UINT8 RenderTargetWriteMask; -}; - -struct GSVertexShader9 -{ - CComPtr vs; - CComPtr il; -}; - -class GSDevice9 : public GSDeviceDX -{ - GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format); - - void DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c); - void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0); - void DoFXAA(GSTexture* sTex, GSTexture* dTex); - void DoShadeBoost(GSTexture* sTex, GSTexture* dTex); - void DoExternalFX(GSTexture* sTex, GSTexture* dTex); - - void InitExternalFX(); - void InitFXAA(); - - // - - D3DCAPS9 m_d3dcaps; - D3DPRESENT_PARAMETERS m_pp; - CComPtr m_d3d; - CComPtr m_dev; - CComPtr m_swapchain; - CComPtr m_vb; - CComPtr m_vb_old; - CComPtr m_ib; - CComPtr m_ib_old; - bool m_lost; - D3DFORMAT m_depth_format; - - struct - { - IDirect3DVertexBuffer9* vb; - size_t vb_stride; - IDirect3DIndexBuffer9* ib; - IDirect3DVertexDeclaration9* layout; - D3DPRIMITIVETYPE topology; - IDirect3DVertexShader9* vs; - float* vs_cb; - int vs_cb_len; - IDirect3DTexture9* ps_srvs[3]; - IDirect3DPixelShader9* ps; - float* ps_cb; - int ps_cb_len; - Direct3DSamplerState9* ps_ss; - GSVector4i scissor; - Direct3DDepthStencilState9* dss; - Direct3DBlendState9* bs; - uint32 bf; - IDirect3DSurface9* rtv; - IDirect3DSurface9* dsv; - } m_state; - -public: // TODO - - bool FXAA_Compiled; - bool ExShader_Compiled; - - struct - { - CComPtr il; - CComPtr vs; - CComPtr ps[10]; - Direct3DSamplerState9 ln; - Direct3DSamplerState9 pt; - Direct3DDepthStencilState9 dss; - Direct3DBlendState9 bs; - } m_convert; - - struct - { - CComPtr ps[2]; - Direct3DBlendState9 bs; - } m_merge; - - struct - { - CComPtr ps[4]; - } m_interlace; - - struct - { - CComPtr ps; - } m_shaderfx; - - struct - { - CComPtr ps; - } m_fxaa; - - struct - { - CComPtr ps; - } m_shadeboost; - - struct - { - Direct3DDepthStencilState9 dss; - Direct3DBlendState9 bs; - } m_date; - - void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm); - - // Shaders... - - hash_map m_vs; - hash_map > m_ps; - hash_map m_ps_ss; - hash_map m_om_dss; - hash_map m_om_bs; - hash_map m_mskfix; - - GSTexture* CreateMskFix(uint32 size, uint32 msk, uint32 fix); - -public: - GSDevice9(); - virtual ~GSDevice9(); - - bool Create(GSWnd* wnd); - bool Reset(int w, int h); - bool IsLost(bool update); - void Flip(); - - void SetVSync(bool enable); - - void BeginScene(); - void DrawPrimitive(); - void DrawIndexedPrimitive(); - void EndScene(); - - void ClearRenderTarget(GSTexture* t, const GSVector4& c); - void ClearRenderTarget(GSTexture* t, uint32 c); - void ClearDepth(GSTexture* t, float c); - void ClearStencil(GSTexture* t, uint8 c); - - GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); - GSTexture* CreateTexture(int w, int h, int format = 0); - GSTexture* CreateOffscreen(int w, int h, int format = 0); - - GSTexture* Resolve(GSTexture* t); - - GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0); - - void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r); - - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true); - - void IASetVertexBuffer(const void* vertex, size_t stride, size_t count); - bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count); - void IAUnmapVertexBuffer(); - void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride); - void IASetIndexBuffer(const void* index, size_t count); - void IASetIndexBuffer(IDirect3DIndexBuffer9* ib); - void IASetInputLayout(IDirect3DVertexDeclaration9* layout); - void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology); - void VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len); - void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1); - void PSSetShaderResource(int i, GSTexture* sr); - void PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len); - void PSSetSamplerState(Direct3DSamplerState9* ss); - void OMSetDepthStencilState(Direct3DDepthStencilState9* dss); - void OMSetBlendState(Direct3DBlendState9* bs, uint32 bf); - void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); - - IDirect3DDevice9* operator->() {return m_dev;} - operator IDirect3DDevice9*() {return m_dev;} - - void CompileShader(const char *source, size_t size, const char *filename, const string& entry, const D3D_SHADER_MACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il); - void CompileShader(const char *source, size_t size, const char *filename, const string& entry, const D3D_SHADER_MACRO* macro, IDirect3DPixelShader9** ps); - - void SetupVS(VSSelector sel, const VSConstantBuffer* cb); - void SetupGS(GSSelector sel) {} - void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); - void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix); - - bool HasStencil() { return m_depth_format == D3DFMT_D24S8; } - bool HasDepth32() { return m_depth_format != D3DFMT_D24S8; } - - static uint32 GetMaxDepth(uint32 msaaCount = 0, std::string adapter_id = ""); - static void ForceValidMsaaConfig(); - -}; - diff --git a/plugins/GSdx_legacy/GSDeviceDX.cpp b/plugins/GSdx_legacy/GSDeviceDX.cpp deleted file mode 100644 index 2844f37e40..0000000000 --- a/plugins/GSdx_legacy/GSDeviceDX.cpp +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSDeviceDX.h" -#include - -HMODULE GSDeviceDX::s_d3d_compiler_dll = nullptr; -decltype(&D3DCompile) GSDeviceDX::s_pD3DCompile = nullptr; -bool GSDeviceDX::s_old_d3d_compiler_dll; - -GSDeviceDX::GSDeviceDX() -{ - m_msaa = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_MSAA", 0) : 0; - - m_msaa_desc.Count = 1; - m_msaa_desc.Quality = 0; -} - -GSDeviceDX::~GSDeviceDX() -{ -} - -bool GSDeviceDX::LoadD3DCompiler() -{ - // Windows 8.1 and later come with the latest d3dcompiler_47.dll, but - // Windows 7 devs might also have the dll available for use (which will - // have to be placed in the application directory) - s_d3d_compiler_dll = LoadLibraryEx("d3dcompiler_47.dll", nullptr, LOAD_LIBRARY_SEARCH_APPLICATION_DIR | LOAD_LIBRARY_SEARCH_SYSTEM32); - - // Windows Vista and 7 can use the older version. If the previous LoadLibrary - // call fails on Windows 8.1 and later, then the user's system is likely - // broken. - if (s_d3d_compiler_dll) - { - s_old_d3d_compiler_dll = false; - } - else - { - if (!IsWindows8Point1OrGreater()) - // Use LoadLibrary instead of LoadLibraryEx, some Windows 7 systems - // have issues with it. - s_d3d_compiler_dll = LoadLibrary("D3DCompiler_43.dll"); - - if (s_d3d_compiler_dll == nullptr) - return false; - - s_old_d3d_compiler_dll = true; - } - - s_pD3DCompile = reinterpret_cast(GetProcAddress(s_d3d_compiler_dll, "D3DCompile")); - if (s_pD3DCompile) - return true; - - FreeLibrary(s_d3d_compiler_dll); - s_d3d_compiler_dll = nullptr; - return false; -} - -void GSDeviceDX::FreeD3DCompiler() -{ - s_pD3DCompile = nullptr; - if (s_d3d_compiler_dll) - FreeLibrary(s_d3d_compiler_dll); - s_d3d_compiler_dll = nullptr; -} - -GSTexture* GSDeviceDX::FetchSurface(int type, int w, int h, bool msaa, int format) -{ - if(m_msaa < 2) - { - msaa = false; - } - - return __super::FetchSurface(type, w, h, msaa, format); -} - -bool GSDeviceDX::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode) -{ - m_shader.level = level; - - switch(level) - { - case D3D_FEATURE_LEVEL_9_1: - case D3D_FEATURE_LEVEL_9_2: - m_shader.model = "0x200"; - m_shader.vs = compat_mode ? "vs_4_0_level_9_1" : "vs_2_0"; - m_shader.ps = compat_mode ? "ps_4_0_level_9_1" : "ps_2_0"; - break; - case D3D_FEATURE_LEVEL_9_3: - m_shader.model = "0x300"; - m_shader.vs = compat_mode ? "vs_4_0_level_9_3" : "vs_3_0"; - m_shader.ps = compat_mode ? "ps_4_0_level_9_3" : "ps_3_0"; - break; - case D3D_FEATURE_LEVEL_10_0: - m_shader.model = "0x400"; - m_shader.vs = "vs_4_0"; - m_shader.gs = "gs_4_0"; - m_shader.ps = "ps_4_0"; - m_shader.cs = "cs_4_0"; - break; - case D3D_FEATURE_LEVEL_10_1: - m_shader.model = "0x401"; - m_shader.vs = "vs_4_1"; - m_shader.gs = "gs_4_1"; - m_shader.ps = "ps_4_1"; - m_shader.cs = "cs_4_1"; - break; - case D3D_FEATURE_LEVEL_11_0: - m_shader.model = "0x500"; - m_shader.vs = "vs_5_0"; - m_shader.gs = "gs_5_0"; - m_shader.ps = "ps_5_0"; - m_shader.cs = "cs_5_0"; - break; - default: - ASSERT(0); - return false; - } - - return true; -} - -// (A - B) * C + D -// A: Cs/Cd/0 -// B: Cs/Cd/0 -// C: As/Ad/FIX -// D: Cs/Cd/0 - -// bogus: 0100, 0110, 0120, 0200, 0210, 0220, 1001, 1011, 1021 -// tricky: 1201, 1211, 1221 - -// Source.rgb = float3(1, 1, 1); -// 1201 Cd*(1 + As) => Source * Dest color + Dest * Source alpha -// 1211 Cd*(1 + Ad) => Source * Dest color + Dest * Dest alpha -// 1221 Cd*(1 + F) => Source * Dest color + Dest * Factor - -const GSDeviceDX::D3D9Blend GSDeviceDX::m_blendMapD3D9[3*3*3*3] = -{ - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 0000: (Cs - Cs)*As + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 0001: (Cs - Cs)*As + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 0002: (Cs - Cs)*As + 0 ==> 0 - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 0010: (Cs - Cs)*Ad + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 0011: (Cs - Cs)*Ad + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 0012: (Cs - Cs)*Ad + 0 ==> 0 - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 0020: (Cs - Cs)*F + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 0021: (Cs - Cs)*F + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 0022: (Cs - Cs)*F + 0 ==> 0 - {1, D3DBLENDOP_SUBTRACT, D3DBLEND_SRCALPHA, D3DBLEND_SRCALPHA}, //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As - {0, D3DBLENDOP_ADD, D3DBLEND_SRCALPHA, D3DBLEND_INVSRCALPHA}, // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_SRCALPHA, D3DBLEND_SRCALPHA}, // 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As - {1, D3DBLENDOP_SUBTRACT, D3DBLEND_DESTALPHA, D3DBLEND_DESTALPHA}, //*0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad - {0, D3DBLENDOP_ADD, D3DBLEND_DESTALPHA, D3DBLEND_INVDESTALPHA}, // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad) - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_DESTALPHA, D3DBLEND_DESTALPHA}, // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad - {1, D3DBLENDOP_SUBTRACT, D3DBLEND_BLENDFACTOR, D3DBLEND_BLENDFACTOR}, //*0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F - {0, D3DBLENDOP_ADD, D3DBLEND_BLENDFACTOR, D3DBLEND_INVBLENDFACTOR}, // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_BLENDFACTOR, D3DBLEND_BLENDFACTOR}, // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F - {1, D3DBLENDOP_ADD, D3DBLEND_SRCALPHA, D3DBLEND_ZERO}, //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) - {0, D3DBLENDOP_ADD, D3DBLEND_SRCALPHA, D3DBLEND_ONE}, // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd - {0, D3DBLENDOP_ADD, D3DBLEND_SRCALPHA, D3DBLEND_ZERO}, // 0202: (Cs - 0)*As + 0 ==> Cs*As - {1, D3DBLENDOP_ADD, D3DBLEND_DESTALPHA, D3DBLEND_ZERO}, //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) - {0, D3DBLENDOP_ADD, D3DBLEND_DESTALPHA, D3DBLEND_ONE}, // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd - {0, D3DBLENDOP_ADD, D3DBLEND_DESTALPHA, D3DBLEND_ZERO}, // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad - {1, D3DBLENDOP_ADD, D3DBLEND_BLENDFACTOR, D3DBLEND_ZERO}, //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) - {0, D3DBLENDOP_ADD, D3DBLEND_BLENDFACTOR, D3DBLEND_ONE}, // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd - {0, D3DBLENDOP_ADD, D3DBLEND_BLENDFACTOR, D3DBLEND_ZERO}, // 0222: (Cs - 0)*F + 0 ==> Cs*F - {0, D3DBLENDOP_ADD, D3DBLEND_INVSRCALPHA, D3DBLEND_SRCALPHA}, // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) - {1, D3DBLENDOP_REVSUBTRACT, D3DBLEND_SRCALPHA, D3DBLEND_SRCALPHA}, //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_SRCALPHA, D3DBLEND_SRCALPHA}, // 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As - {0, D3DBLENDOP_ADD, D3DBLEND_INVDESTALPHA, D3DBLEND_DESTALPHA}, // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad) - {1, D3DBLENDOP_REVSUBTRACT, D3DBLEND_DESTALPHA, D3DBLEND_DESTALPHA}, //*1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_DESTALPHA, D3DBLEND_DESTALPHA}, // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad - {0, D3DBLENDOP_ADD, D3DBLEND_INVBLENDFACTOR, D3DBLEND_BLENDFACTOR}, // 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F) - {1, D3DBLENDOP_REVSUBTRACT, D3DBLEND_BLENDFACTOR, D3DBLEND_BLENDFACTOR},//*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_BLENDFACTOR, D3DBLEND_BLENDFACTOR},// 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 1100: (Cd - Cd)*As + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 1101: (Cd - Cd)*As + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 1102: (Cd - Cd)*As + 0 ==> 0 - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 1110: (Cd - Cd)*Ad + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 1111: (Cd - Cd)*Ad + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 1112: (Cd - Cd)*Ad + 0 ==> 0 - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 1120: (Cd - Cd)*F + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 1121: (Cd - Cd)*F + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 1122: (Cd - Cd)*F + 0 ==> 0 - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_SRCALPHA}, // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As - {2, D3DBLENDOP_ADD, D3DBLEND_DESTCOLOR, D3DBLEND_SRCALPHA}, //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background glow effect - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_SRCALPHA}, // 1202: (Cd - 0)*As + 0 ==> Cd*As - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_DESTALPHA}, // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad - {2, D3DBLENDOP_ADD, D3DBLEND_DESTCOLOR, D3DBLEND_DESTALPHA}, //#1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_DESTALPHA}, // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_BLENDFACTOR}, // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F - {2, D3DBLENDOP_ADD, D3DBLEND_DESTCOLOR, D3DBLEND_BLENDFACTOR}, //#1221: (Cd - 0)*F + Cd ==> Cd*(1 + F) - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_BLENDFACTOR}, // 1222: (Cd - 0)*F + 0 ==> Cd*F - {0, D3DBLENDOP_ADD, D3DBLEND_INVSRCALPHA, D3DBLEND_ZERO}, // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_SRCALPHA, D3DBLEND_ONE}, // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_SRCALPHA, D3DBLEND_ZERO}, // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As - {0, D3DBLENDOP_ADD, D3DBLEND_INVDESTALPHA, D3DBLEND_ZERO}, // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_DESTALPHA, D3DBLEND_ONE}, // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_DESTALPHA, D3DBLEND_ZERO}, // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad - {0, D3DBLENDOP_ADD, D3DBLEND_INVBLENDFACTOR, D3DBLEND_ZERO}, // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_BLENDFACTOR, D3DBLEND_ONE}, // 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F - {0, D3DBLENDOP_REVSUBTRACT, D3DBLEND_BLENDFACTOR, D3DBLEND_ZERO}, // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_ONE, D3DBLEND_SRCALPHA}, // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_INVSRCALPHA}, // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As) - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_ZERO, D3DBLEND_SRCALPHA}, // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_ONE, D3DBLEND_DESTALPHA}, // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_INVDESTALPHA}, // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_ONE, D3DBLEND_DESTALPHA}, // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_ONE, D3DBLEND_BLENDFACTOR}, // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_INVBLENDFACTOR}, // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) - {0, D3DBLENDOP_SUBTRACT, D3DBLEND_ONE, D3DBLEND_BLENDFACTOR}, // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 2200: (0 - 0)*As + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 2201: (0 - 0)*As + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 2202: (0 - 0)*As + 0 ==> 0 - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 2210: (0 - 0)*Ad + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 2211: (0 - 0)*Ad + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 2212: (0 - 0)*Ad + 0 ==> 0 - {0, D3DBLENDOP_ADD, D3DBLEND_ONE, D3DBLEND_ZERO}, // 2220: (0 - 0)*F + Cs ==> Cs - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ONE}, // 2221: (0 - 0)*F + Cd ==> Cd - {0, D3DBLENDOP_ADD, D3DBLEND_ZERO, D3DBLEND_ZERO}, // 2222: (0 - 0)*F + 0 ==> 0 -}; - diff --git a/plugins/GSdx_legacy/GSDeviceDX.h b/plugins/GSdx_legacy/GSDeviceDX.h deleted file mode 100644 index cdf03312ad..0000000000 --- a/plugins/GSdx_legacy/GSDeviceDX.h +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSVector.h" -#include "GSDevice.h" -#include "GSAlignedClass.h" - -class GSDeviceDX : public GSDevice -{ -public: - #pragma pack(push, 1) - - __aligned(struct, 32) VSConstantBuffer - { - GSVector4 VertexScale; - GSVector4 VertexOffset; - GSVector4 TextureScale; - - struct VSConstantBuffer() - { - VertexScale = GSVector4::zero(); - VertexOffset = GSVector4::zero(); - TextureScale = GSVector4::zero(); - } - - __forceinline bool Update(const VSConstantBuffer* cb) - { - GSVector4i* a = (GSVector4i*)this; - GSVector4i* b = (GSVector4i*)cb; - - GSVector4i b0 = b[0]; - GSVector4i b1 = b[1]; - GSVector4i b2 = b[2]; - - if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue()) - { - a[0] = b0; - a[1] = b1; - a[2] = b2; - - return true; - } - - return false; - } - }; - - struct VSSelector - { - union - { - struct - { - uint32 bppz:2; - uint32 tme:1; - uint32 fst:1; - uint32 logz:1; - uint32 rtcopy:1; - }; - - uint32 key; - }; - - operator uint32() {return key & 0xff;} - - VSSelector() : key(0) {} - }; - - __aligned(struct, 32) PSConstantBuffer - { - GSVector4 FogColor_AREF; - GSVector4 HalfTexel; - GSVector4 WH; - GSVector4 MinMax; - GSVector4 MinF_TA; - GSVector4i MskFix; - - GSVector4 TC_OffsetHack; - - struct PSConstantBuffer() - { - FogColor_AREF = GSVector4::zero(); - HalfTexel = GSVector4::zero(); - WH = GSVector4::zero(); - MinMax = GSVector4::zero(); - MinF_TA = GSVector4::zero(); - MskFix = GSVector4i::zero(); - } - - __forceinline bool Update(const PSConstantBuffer* cb) - { - GSVector4i* a = (GSVector4i*)this; - GSVector4i* b = (GSVector4i*)cb; - - GSVector4i b0 = b[0]; - GSVector4i b1 = b[1]; - GSVector4i b2 = b[2]; - GSVector4i b3 = b[3]; - GSVector4i b4 = b[4]; - GSVector4i b5 = b[5]; - - if(!((a[0] == b0) /*& (a[1] == b1)*/ & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) // if WH matches HalfTexel does too - { - a[0] = b0; - a[1] = b1; - a[2] = b2; - a[3] = b3; - a[4] = b4; - a[5] = b5; - - return true; - } - - return false; - } - }; - - struct GSSelector - { - union - { - struct - { - uint32 iip:1; - uint32 prim:2; - }; - - uint32 key; - }; - - operator uint32() {return key & 0x7;} - - GSSelector() : key(0) {} - }; - - struct PSSelector - { - union - { - struct - { - uint32 fst:1; - uint32 wms:2; - uint32 wmt:2; - uint32 fmt:3; - uint32 aem:1; - uint32 tfx:3; - uint32 tcc:1; - uint32 atst:3; - uint32 fog:1; - uint32 clr1:1; - uint32 fba:1; - uint32 aout:1; - uint32 rt:1; - uint32 ltf:1; - uint32 colclip:2; - uint32 date:2; - uint32 spritehack:1; - uint32 tcoffsethack:1; - uint32 point_sampler:1; - uint32 shuffle:1; - uint32 read_ba:1; - }; - - uint32 key; - }; - - operator uint32() {return key & 0xfffffff;} - - PSSelector() : key(0) {} - }; - - struct PSSamplerSelector - { - union - { - struct - { - uint32 tau:1; - uint32 tav:1; - uint32 ltf:1; - }; - - uint32 key; - }; - - operator uint32() {return key & 0x7;} - - PSSamplerSelector() : key(0) {} - }; - - struct OMDepthStencilSelector - { - union - { - struct - { - uint32 ztst:2; - uint32 zwe:1; - uint32 date:1; - uint32 fba:1; - uint32 alpha_stencil:1; - }; - - uint32 key; - }; - - operator uint32() {return key & 0x3f;} - - OMDepthStencilSelector() : key(0) {} - }; - - struct OMBlendSelector - { - union - { - struct - { - uint32 abe:1; - uint32 a:2; - uint32 b:2; - uint32 c:2; - uint32 d:2; - uint32 wr:1; - uint32 wg:1; - uint32 wb:1; - uint32 wa:1; - uint32 negative:1; - }; - - struct - { - uint32 _pad:1; - uint32 abcd:8; - uint32 wrgba:4; - }; - - uint32 key; - }; - - operator uint32() {return key & 0x3fff;} - - OMBlendSelector() : key(0) {} - - bool IsCLR1() const - { - return (key & 0x19f) == 0x93; // abe == 1 && a == 1 && b == 2 && d == 1 - } - }; - - struct D3D9Blend {int bogus, op, src, dst;}; - static const D3D9Blend m_blendMapD3D9[3*3*3*3]; - - #pragma pack(pop) - -protected: - struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps, cs;} m_shader; - uint32 m_msaa; - DXGI_SAMPLE_DESC m_msaa_desc; - - static HMODULE s_d3d_compiler_dll; - static decltype(&D3DCompile) s_pD3DCompile; - // Older version doesn't support D3D_COMPILE_STANDARD_FILE_INCLUDE, which - // could be useful for external shaders. - static bool s_old_d3d_compiler_dll; - - GSTexture* FetchSurface(int type, int w, int h, bool msaa, int format); - -public: - GSDeviceDX(); - virtual ~GSDeviceDX(); - - bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode); - void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const {level = m_shader.level;} - - virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0; - virtual void SetupGS(GSSelector sel) = 0; - virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0; - virtual void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix) = 0; - - virtual void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm) = 0; - - virtual bool HasStencil() = 0; - virtual bool HasDepth32() = 0; - - static bool LoadD3DCompiler(); - static void FreeD3DCompiler(); - - template void PrepareShaderMacro(vector& dst, const T* src) - { - dst.clear(); - - while(src && src->Definition && src->Name) - { - dst.push_back(*src++); - } - - T m; - - m.Name = "SHADER_MODEL"; - m.Definition = m_shader.model.c_str(); - - dst.push_back(m); - - m.Name = NULL; - m.Definition = NULL; - - dst.push_back(m); - } -}; - diff --git a/plugins/GSdx_legacy/GSDeviceNull.cpp b/plugins/GSdx_legacy/GSDeviceNull.cpp deleted file mode 100644 index d1b5d4cb41..0000000000 --- a/plugins/GSdx_legacy/GSDeviceNull.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDeviceNull.h" - -bool GSDeviceNull::Create(GSWnd* wnd) -{ - if(!GSDevice::Create(wnd)) - return false; - - Reset(1, 1); - - return true; -} - -bool GSDeviceNull::Reset(int w, int h) -{ - if(!GSDevice::Reset(w, h)) - return false; - - return true; -} - -GSTexture* GSDeviceNull::CreateSurface(int type, int w, int h, bool msaa, int format) -{ - return new GSTextureNull(type, w, h, format); -} - diff --git a/plugins/GSdx_legacy/GSDeviceNull.h b/plugins/GSdx_legacy/GSDeviceNull.h deleted file mode 100644 index c61312ab1a..0000000000 --- a/plugins/GSdx_legacy/GSDeviceNull.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDevice.h" -#include "GSTextureNull.h" - -class GSDeviceNull : public GSDevice -{ -private: - GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format); - - void DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c) {} - void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0) {} - -public: - GSDeviceNull() {} - - bool Create(GSWnd* wnd); - bool Reset(int w, int h); -}; - diff --git a/plugins/GSdx_legacy/GSDeviceOGL.cpp b/plugins/GSdx_legacy/GSDeviceOGL.cpp deleted file mode 100644 index 7af10383d9..0000000000 --- a/plugins/GSdx_legacy/GSDeviceOGL.cpp +++ /dev/null @@ -1,1697 +0,0 @@ -/* - * Copyright (C) 2011-2014 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDeviceOGL.h" -#include "GLState.h" -#include - -#include "res/glsl_source.h" - -//#define ONLY_LINES - -// TODO port those value into PerfMon API -#ifdef ENABLE_OGL_DEBUG_MEM_BW -uint64 g_real_texture_upload_byte = 0; -uint64 g_vertex_upload_byte = 0; -uint64 g_uniform_upload_byte = 0; -#endif - -static const uint32 g_merge_cb_index = 10; -static const uint32 g_interlace_cb_index = 11; -static const uint32 g_shadeboost_cb_index = 12; -static const uint32 g_fx_cb_index = 14; -static const uint32 g_convert_index = 15; - -bool GSDeviceOGL::m_debug_gl_call = false; -int GSDeviceOGL::s_n = 0; -FILE* GSDeviceOGL::m_debug_gl_file = NULL; - -GSDeviceOGL::GSDeviceOGL() - : m_msaa(0) - , m_window(NULL) - , m_fbo(0) - , m_fbo_read(0) - , m_va(NULL) - , m_apitrace(0) - , m_palette_ss(0) - , m_vs_cb(NULL) - , m_ps_cb(NULL) - , m_shader(NULL) -{ - memset(&m_merge_obj, 0, sizeof(m_merge_obj)); - memset(&m_interlace, 0, sizeof(m_interlace)); - memset(&m_convert, 0, sizeof(m_convert)); - memset(&m_fxaa, 0, sizeof(m_fxaa)); - memset(&m_shaderfx, 0, sizeof(m_shaderfx)); - memset(&m_date, 0, sizeof(m_date)); - memset(&m_shadeboost, 0, sizeof(m_shadeboost)); - memset(&m_om_dss, 0, sizeof(m_om_dss)); - GLState::Clear(); - - // Reset the debug file - #ifdef ENABLE_OGL_DEBUG - m_debug_gl_file = fopen("GSdx_opengl_debug.txt","w"); - #endif - - m_debug_gl_call = theApp.GetConfig("debug_opengl", 0); -} - -GSDeviceOGL::~GSDeviceOGL() -{ - if (m_debug_gl_file) { - fclose(m_debug_gl_file); - m_debug_gl_file = NULL; - } - - // If the create function wasn't called nothing to do. - if (m_shader == NULL) - return; - - GL_PUSH("GSDeviceOGL destructor"); - - // Clean vertex buffer state - delete (m_va); - - // Clean m_merge_obj - for (size_t i = 0; i < countof(m_merge_obj.ps); i++) - m_shader->Delete(m_merge_obj.ps[i]); - delete (m_merge_obj.cb); - - // Clean m_interlace - for (size_t i = 0; i < countof(m_interlace.ps); i++) - m_shader->Delete(m_interlace.ps[i]); - delete (m_interlace.cb); - - // Clean m_convert - m_shader->Delete(m_convert.vs); - for (size_t i = 0; i < countof(m_convert.ps); i++) - m_shader->Delete(m_convert.ps[i]); - delete m_convert.dss; - delete m_convert.dss_write; - delete m_convert.cb; - - // Clean m_fxaa - delete m_fxaa.cb; - m_shader->Delete(m_fxaa.ps); - - // Clean m_shaderfx - delete m_shaderfx.cb; - m_shader->Delete(m_shaderfx.ps); - - // Clean m_date - delete m_date.dss; - - // Clean shadeboost - delete m_shadeboost.cb; - m_shader->Delete(m_shadeboost.ps); - - - // Clean various opengl allocation - glDeleteFramebuffers(1, &m_fbo); - glDeleteFramebuffers(1, &m_fbo_read); - - // Delete HW FX - delete m_vs_cb; - delete m_ps_cb; - glDeleteSamplers(1, &m_palette_ss); - m_shader->Delete(m_apitrace); - - for (uint32 key = 0; key < countof(m_vs); key++) m_shader->Delete(m_vs[key]); - for (uint32 key = 0; key < countof(m_gs); key++) m_shader->Delete(m_gs[key]); - for (auto it = m_ps.begin(); it != m_ps.end() ; it++) m_shader->Delete(it->second); - - m_ps.clear(); - - glDeleteSamplers(countof(m_ps_ss), m_ps_ss); - - for (uint32 key = 0; key < countof(m_om_dss); key++) delete m_om_dss[key]; - - PboPool::Destroy(); - - // Must be done after the destruction of all shader/program objects - delete m_shader; - m_shader = NULL; - - GL_POP(); -} - -GSTexture* GSDeviceOGL::CreateSurface(int type, int w, int h, bool msaa, int fmt) -{ - GL_PUSH("Create surface"); - - // A wrapper to call GSTextureOGL, with the different kind of parameter - GSTextureOGL* t = NULL; - t = new GSTextureOGL(type, w, h, fmt, m_fbo_read); - - // NOTE: I'm not sure RenderTarget always need to be cleared. It could be costly for big upscale. - switch(type) - { - case GSTexture::RenderTarget: - ClearRenderTarget(t, 0); - break; - case GSTexture::DepthStencil: - ClearDepth(t, 0); - // No need to clear the stencil now. - break; - } - - GL_POP(); - return t; -} - -GSTexture* GSDeviceOGL::FetchSurface(int type, int w, int h, bool msaa, int format) -{ - return GSDevice::FetchSurface(type, w, h, false, format); -} - -bool GSDeviceOGL::Create(GSWnd* wnd) -{ - if (m_window == NULL) { - if (!GLLoader::check_gl_version(3, 3)) return false; - - if (!GLLoader::check_gl_supported_extension()) return false; - } - - m_window = wnd; - - // **************************************************************** - // Debug helper - // **************************************************************** -#ifdef ENABLE_OGL_DEBUG - if (theApp.GetConfig("debug_opengl", 0)) { - if (glDebugMessageCallback) { - glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL); - glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB); - } - if (glDebugMessageControl) { - glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true); - // Useless info message on Nvidia driver - GLuint ids[] = {0x20004}; - glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, countof(ids), ids, false); - } - } -#endif - - // WARNING it must be done after the control setup (at least on MESA) - GL_PUSH("GSDeviceOGL::Create"); - - // **************************************************************** - // Various object - // **************************************************************** - GL_PUSH("GSDeviceOGL::Various"); - - m_shader = new GSShaderOGL(!!theApp.GetConfig("debug_glsl_shader", 0)); - - glGenFramebuffers(1, &m_fbo); - // Always write to the first buffer - OMSetFBO(m_fbo); - GLenum target[1] = {GL_COLOR_ATTACHMENT0}; - glDrawBuffers(1, target); - OMSetFBO(0); - - glGenFramebuffers(1, &m_fbo_read); - // Always read from the first buffer - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); - glReadBuffer(GL_COLOR_ATTACHMENT0); - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - - GL_POP(); - - // **************************************************************** - // Vertex buffer state - // **************************************************************** - GL_PUSH("GSDeviceOGL::Vertex Buffer"); - - ASSERT(sizeof(GSVertexPT1) == sizeof(GSVertex)); - GSInputLayoutOGL il_convert[] = - { - {2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(0) } , - {2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(16) } , - {4 , GL_UNSIGNED_BYTE , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(8) } , - {1 , GL_FLOAT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(12) } , - {2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(16) } , - {1 , GL_UNSIGNED_INT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(20) } , - {2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(24) } , - {4 , GL_UNSIGNED_BYTE , GL_TRUE , sizeof(GSVertex) , (const GLvoid*)(28) } , // Only 1 byte is useful but hardware unit only support 4B - }; - m_va = new GSVertexBufferStateOGL(il_convert, countof(il_convert)); - - GL_POP(); - // **************************************************************** - // Pre Generate the different sampler object - // **************************************************************** - GL_PUSH("GSDeviceOGL::Sampler"); - - for (uint32 key = 0; key < countof(m_ps_ss); key++) { - m_ps_ss[key] = CreateSampler(PSSamplerSelector(key)); - } - - GL_POP(); - - // **************************************************************** - // convert - // **************************************************************** - GL_PUSH("GSDeviceOGL::Convert"); - - m_convert.cb = new GSUniformBufferOGL(g_convert_index, sizeof(ConvertConstantBuffer)); - // Upload once and forget about it - ConvertConstantBuffer cb; - cb.ScalingFactor = GSVector4i(theApp.GetConfig("upscale_multiplier", 1)); - m_convert.cb->upload(&cb); - - m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl); - for(size_t i = 0; i < countof(m_convert.ps); i++) - m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl); - - PSSamplerSelector point; - m_convert.pt = GetSamplerID(point); - - PSSamplerSelector bilinear; - bilinear.ltf = true; - m_convert.ln = GetSamplerID(bilinear); - - m_convert.dss = new GSDepthStencilOGL(); - m_convert.dss_write = new GSDepthStencilOGL(); - m_convert.dss_write->EnableDepth(); - m_convert.dss_write->SetDepth(GL_ALWAYS, true); - - GL_POP(); - - // **************************************************************** - // merge - // **************************************************************** - GL_PUSH("GSDeviceOGL::Merge"); - - m_merge_obj.cb = new GSUniformBufferOGL(g_merge_cb_index, sizeof(MergeConstantBuffer)); - - for(size_t i = 0; i < countof(m_merge_obj.ps); i++) - m_merge_obj.ps[i] = m_shader->Compile("merge.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, merge_glsl); - - GL_POP(); - - // **************************************************************** - // interlace - // **************************************************************** - GL_PUSH("GSDeviceOGL::Interlace"); - - m_interlace.cb = new GSUniformBufferOGL(g_interlace_cb_index, sizeof(InterlaceConstantBuffer)); - - for(size_t i = 0; i < countof(m_interlace.ps); i++) - m_interlace.ps[i] = m_shader->Compile("interlace.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, interlace_glsl); - - GL_POP(); - - // **************************************************************** - // Shade boost - // **************************************************************** - GL_PUSH("GSDeviceOGL::Shadeboost"); - - m_shadeboost.cb = new GSUniformBufferOGL(g_shadeboost_cb_index, sizeof(ShadeBoostConstantBuffer)); - - int ShadeBoost_Contrast = theApp.GetConfig("ShadeBoost_Contrast", 50); - int ShadeBoost_Brightness = theApp.GetConfig("ShadeBoost_Brightness", 50); - int ShadeBoost_Saturation = theApp.GetConfig("ShadeBoost_Saturation", 50); - std::string shade_macro = format("#define SB_SATURATION %d.0\n", ShadeBoost_Saturation) - + format("#define SB_BRIGHTNESS %d.0\n", ShadeBoost_Brightness) - + format("#define SB_CONTRAST %d.0\n", ShadeBoost_Contrast); - - m_shadeboost.ps = m_shader->Compile("shadeboost.glsl", "ps_main", GL_FRAGMENT_SHADER, shadeboost_glsl, shade_macro); - - GL_POP(); - - // **************************************************************** - // rasterization configuration - // **************************************************************** - GL_PUSH("GSDeviceOGL::Rasterization"); - -#ifdef ONLY_LINES - glLineWidth(5.0); - glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); -#else - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); -#endif - glDisable(GL_CULL_FACE); - glEnable(GL_SCISSOR_TEST); - glDisable(GL_MULTISAMPLE); - glDisable(GL_DITHER); // Honestly I don't know! - - GL_POP(); - - // **************************************************************** - // DATE - // **************************************************************** - GL_PUSH("GSDeviceOGL::Date"); - - m_date.dss = new GSDepthStencilOGL(); - m_date.dss->EnableStencil(); - m_date.dss->SetStencil(GL_ALWAYS, GL_REPLACE); - - GL_POP(); - // **************************************************************** - // Use DX coordinate convention - // **************************************************************** - - - // VS gl_position.z => [-1,-1] - // FS depth => [0, 1] - // because of -1 we loose lot of precision for small GS value - // This extension allow FS depth to range from -1 to 1. So - // gl_position.z could range from [0, 1] - if (GLLoader::found_GL_ARB_clip_control) { - // Change depth convention - glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); - } - - // **************************************************************** - // HW renderer shader - // **************************************************************** - GL_PUSH("GSDeviceOGL::CreateTextureFX"); - - CreateTextureFX(); - - GL_POP(); - - // **************************************************************** - // Pbo Pool allocation - // **************************************************************** - GL_PUSH("GSDeviceOGL::PBO"); - - PboPool::Init(); - - GL_POP(); - - // Done ! - GL_POP(); - - // **************************************************************** - // Finish window setup and backbuffer - // **************************************************************** - if(!GSDevice::Create(wnd)) - return false; - - GSVector4i rect = wnd->GetClientRect(); - Reset(rect.z, rect.w); - - // Basic to ensure structures are correctly packed - ASSERT(sizeof(VSSelector) == 4); - ASSERT(sizeof(PSSelector) == 8); - ASSERT(sizeof(PSSamplerSelector) == 4); - ASSERT(sizeof(OMDepthStencilSelector) == 4); - ASSERT(sizeof(OMColorMaskSelector) == 4); - - return true; -} - -bool GSDeviceOGL::Reset(int w, int h) -{ - if(!GSDevice::Reset(w, h)) - return false; - - // Opengl allocate the backbuffer with the window. The render is done in the backbuffer when - // there isn't any FBO. Only a dummy texture is created to easily detect when the rendering is done - // in the backbuffer - m_backbuffer = new GSTextureOGL(GSTextureOGL::Backbuffer, w, h, 0, m_fbo_read); - - return true; -} - -void GSDeviceOGL::SetVSync(bool enable) -{ - m_wnd->SetVSync(enable); -} - -void GSDeviceOGL::Flip() -{ - #ifdef ENABLE_OGL_DEBUG - CheckDebugLog(); - #endif - - m_wnd->Flip(); -} - -void GSDeviceOGL::BeforeDraw() -{ - m_shader->UseProgram(); -} - -void GSDeviceOGL::AfterDraw() -{ -} - -void GSDeviceOGL::DrawPrimitive() -{ - BeforeDraw(); - m_va->DrawPrimitive(); - AfterDraw(); -} - -void GSDeviceOGL::DrawPrimitive(int offset, int count) -{ - BeforeDraw(); - m_va->DrawPrimitive(offset, count); - AfterDraw(); -} - -void GSDeviceOGL::DrawIndexedPrimitive() -{ - BeforeDraw(); - m_va->DrawIndexedPrimitive(); - AfterDraw(); -} - -void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count) -{ - //ASSERT(offset + count <= (int)m_index.count); - - BeforeDraw(); - m_va->DrawIndexedPrimitive(offset, count); - AfterDraw(); -} - -void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) -{ - if (!t) return; - - GSTextureOGL* T = static_cast(t); - if (T->HasBeenCleaned() && !T->IsBackbuffer()) - return; - - GL_PUSH("Clear RT %d", T->GetID()); - - // TODO: check size of scissor before toggling it - glDisable(GL_SCISSOR_TEST); - - uint32 old_color_mask = GLState::wrgba; - OMSetColorMaskState(); - - if (T->IsBackbuffer()) { - OMSetFBO(0); - - // glDrawBuffer(GL_BACK); // this is the default when there is no FB - // 0 will select the first drawbuffer ie GL_BACK - glClearBufferfv(GL_COLOR, 0, c.v); - } else { - OMSetFBO(m_fbo); - OMAttachRt(T); - - glClearBufferfv(GL_COLOR, 0, c.v); - - } - - OMSetColorMaskState(OMColorMaskSelector(old_color_mask)); - - glEnable(GL_SCISSOR_TEST); - - T->WasCleaned(); - - GL_POP(); -} - -void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) -{ - if (!t) return; - - GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255); - ClearRenderTarget(t, color); -} - -void GSDeviceOGL::ClearRenderTarget_i(GSTexture* t, int32 c) -{ - if (!t) return; - - GSTextureOGL* T = static_cast(t); - - GL_PUSH("Clear RTi %d", T->GetID()); - - uint32 old_color_mask = GLState::wrgba; - OMSetColorMaskState(); - - // Keep SCISSOR_TEST enabled on purpose to reduce the size - // of clean in DATE (impact big upscaling) - int32 col[4] = {c, c, c, c}; - - OMSetFBO(m_fbo); - OMAttachRt(T); - - // Blending is not supported when you render to an Integer texture - if (GLState::blend) { - glDisable(GL_BLEND); - } - - glClearBufferiv(GL_COLOR, 0, col); - - OMSetColorMaskState(OMColorMaskSelector(old_color_mask)); - - if (GLState::blend) { - glEnable(GL_BLEND); - } - - GL_POP(); -} - -void GSDeviceOGL::ClearDepth(GSTexture* t, float c) -{ - if (!t) return; - - GSTextureOGL* T = static_cast(t); - - GL_PUSH("Clear Depth %d", T->GetID()); - - OMSetFBO(m_fbo); - OMAttachDs(T); - - // TODO: check size of scissor before toggling it - glDisable(GL_SCISSOR_TEST); - if (GLState::depth_mask) { - glClearBufferfv(GL_DEPTH, 0, &c); - } else { - glDepthMask(true); - glClearBufferfv(GL_DEPTH, 0, &c); - glDepthMask(false); - } - glEnable(GL_SCISSOR_TEST); - - GL_POP(); -} - -void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) -{ - if (!t) return; - - GSTextureOGL* T = static_cast(t); - - GL_PUSH("Clear Stencil %d", T->GetID()); - - // Keep SCISSOR_TEST enabled on purpose to reduce the size - // of clean in DATE (impact big upscaling) - OMSetFBO(m_fbo); - OMAttachDs(T); - GLint color = c; - - glClearBufferiv(GL_STENCIL, 0, &color); - - GL_POP(); -} - -GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel) -{ - return CreateSampler(sel.ltf, sel.tau, sel.tav, sel.aniso); -} - -GLuint GSDeviceOGL::CreateSampler(bool bilinear, bool tau, bool tav, bool aniso) -{ - GL_PUSH("Create Sampler"); - - GLuint sampler; - glGenSamplers(1, &sampler); - if (bilinear) { - glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - } else { - glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - - if (tau) - glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S, GL_REPEAT); - else - glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - if (tav) - glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T, GL_REPEAT); - else - glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); - - glSamplerParameterf(sampler, GL_TEXTURE_MIN_LOD, 0); - glSamplerParameterf(sampler, GL_TEXTURE_MAX_LOD, 6); - - int anisotropy = theApp.GetConfig("MaxAnisotropy", 0); - if (GLLoader::found_GL_EXT_texture_filter_anisotropic && anisotropy && aniso) - glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY_EXT, (float)anisotropy); - - GL_POP(); - return sampler; -} - -void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt) -{ - const GSVector2i& rtsize = rt->GetSize(); - - // Create a texture to avoid the useless clean@0 - if (m_date.t == NULL) - m_date.t = CreateTexture(rtsize.x, rtsize.y, GL_R32I); - - // Clean with the max signed value - ClearRenderTarget_i(m_date.t, 0x7FFFFFFF); - - glBindImageTexture(2, static_cast(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I); -#ifdef ENABLE_OGL_DEBUG - // Help to see the texture in apitrace - PSSetShaderResource(2, m_date.t); -#endif -} - -void GSDeviceOGL::RecycleDateTexture() -{ - if (m_date.t) { - //static_cast(m_date.t)->Save(format("/tmp/date_adv_%04ld.csv", s_n)); - - Recycle(m_date.t); - m_date.t = NULL; - } -} - -void GSDeviceOGL::Barrier(GLbitfield b) -{ - glMemoryBarrier(b); -} - -/* Note: must be here because tfx_glsl is static */ -GLuint GSDeviceOGL::CompileVS(VSSelector sel, int logz) -{ - std::string macro = format("#define VS_BPPZ %d\n", sel.bppz) - + format("#define VS_LOGZ %d\n", logz) - + format("#define VS_WILDHACK %d\n", sel.wildhack) - ; - - return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, tfx_vgs_glsl, macro); -} - -/* Note: must be here because tfx_glsl is static */ -GLuint GSDeviceOGL::CompileGS(GSSelector sel) -{ - std::string macro = format("#define GS_POINT %d\n", sel.point); - - return m_shader->Compile("tfx_vgs.glsl", "gs_main", GL_GEOMETRY_SHADER, tfx_vgs_glsl, macro); -} - -/* Note: must be here because tfx_glsl is static */ -GLuint GSDeviceOGL::CompilePS(PSSelector sel) -{ - std::string macro = format("#define PS_FST %d\n", sel.fst) - + format("#define PS_WMS %d\n", sel.wms) - + format("#define PS_WMT %d\n", sel.wmt) - + format("#define PS_TEX_FMT %d\n", sel.tex_fmt) - + format("#define PS_DFMT %d\n", sel.dfmt) - + format("#define PS_AEM %d\n", sel.aem) - + format("#define PS_TFX %d\n", sel.tfx) - + format("#define PS_TCC %d\n", sel.tcc) - + format("#define PS_ATST %d\n", sel.atst) - + format("#define PS_FOG %d\n", sel.fog) - + format("#define PS_CLR1 %d\n", sel.clr1) - + format("#define PS_FBA %d\n", sel.fba) - + format("#define PS_LTF %d\n", sel.ltf) - + format("#define PS_COLCLIP %d\n", sel.colclip) - + format("#define PS_DATE %d\n", sel.date) - + format("#define PS_TCOFFSETHACK %d\n", sel.tcoffsethack) - //+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler) - + format("#define PS_BLEND_A %d\n", sel.blend_a) - + format("#define PS_BLEND_B %d\n", sel.blend_b) - + format("#define PS_BLEND_C %d\n", sel.blend_c) - + format("#define PS_BLEND_D %d\n", sel.blend_d) - + format("#define PS_IIP %d\n", sel.iip) - + format("#define PS_SHUFFLE %d\n", sel.shuffle) - + format("#define PS_READ_BA %d\n", sel.read_ba) - + format("#define PS_WRITE_RG %d\n", sel.write_rg) - + format("#define PS_FBMASK %d\n", sel.fbmask) - + format("#define PS_HDR %d\n", sel.hdr) - + format("#define PS_PABE %d\n", sel.pabe); - ; - - return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); -} - -void GSDeviceOGL::SelfShaderTest() -{ -#define RUN_TEST \ - do { \ - GLuint p = CompilePS(sel); \ - nb_shader++; \ - perf += m_shader->DumpAsm(file, p); \ - m_shader->Delete(p); \ - } while(0); - -#define PRINT_TEST(s) \ - do { \ - fprintf(stderr, "%s %d instructions for %d shaders (mean of %4.2f)\n", \ - s, perf, nb_shader, (float)perf/(float)nb_shader); \ - all += perf; \ - perf = 0; \ - nb_shader = 0; \ - } while(0); - - int nb_shader = 0; - int perf = 0; - int all = 0; - // Test: SW blending - for (int colclip = 0; colclip < 2; colclip++) { - for (int fmt = 0; fmt < 3; fmt++) { - for (int i = 0; i < 3; i++) { - PSSelector sel; - sel.atst = 1; - sel.tfx = 4; - - int ib = (i + 1) % 3; - sel.blend_a = i; - sel.blend_b = ib;; - sel.blend_c = i; - sel.blend_d = i; - sel.colclip = colclip; - sel.dfmt = fmt; - - std::string file = format("Shader_Blend_%d_%d_%d_%d__Cclip_%d__Dfmt_%d.glsl.asm", - i, ib, i, i, colclip, fmt); - RUN_TEST; - } - } - } - PRINT_TEST("Blend"); - - // Test: alpha test - for (int atst = 0; atst < 8; atst++) { - PSSelector sel; - sel.tfx = 4; - - sel.atst = atst; - std::string file = format("Shader_Atst_%d.glsl.asm", atst); - RUN_TEST; - } - PRINT_TEST("Alpha Tst"); - - // Test: fbmask/fog/shuffle/read_ba - for (int read_ba = 0; read_ba < 2; read_ba++) { - PSSelector sel; - sel.tfx = 4; - sel.atst = 1; - - sel.fog = 1; - sel.fbmask = 1; - sel.shuffle = 1; - sel.read_ba = read_ba; - - std::string file = format("Shader_Fog__Fbmask__Shuffle__Read_ba_%d.glsl.asm", read_ba); - RUN_TEST; - } - PRINT_TEST("Fbmask/fog/shuffle/read_ba"); - - // Test: Date - for (int date = 1; date < 7; date++) { - PSSelector sel; - sel.tfx = 4; - sel.atst = 1; - - sel.date = date; - std::string file = format("Shader_Date_%d.glsl.asm", date); - RUN_TEST; - } - PRINT_TEST("Date"); - - // Test: FBA - for (int fmt = 0; fmt < 3; fmt++) { - PSSelector sel; - sel.tfx = 4; - sel.atst = 1; - - sel.fba = 1; - sel.dfmt = fmt; - sel.clr1 = 1; - std::string file = format("Shader_Fba__Clr1__Dfmt_%d.glsl.asm", fmt); - RUN_TEST; - } - PRINT_TEST("Fba/Clr1/Dfmt"); - - // Test: Fst/Tc/IIP - { - PSSelector sel; - sel.tfx = 1; - sel.atst = 1; - - sel.fst = 0; - sel.iip = 1; - sel.tcoffsethack = 1; - - std::string file = format("Shader_Fst__TC__Iip.glsl.asm"); - RUN_TEST; - } - PRINT_TEST("Fst/Tc/IIp"); - - // Test: tfx/tcc - for (int tfx = 0; tfx < 5; tfx++) { - for (int tcc = 0; tcc < 2; tcc++) { - PSSelector sel; - sel.atst = 1; - sel.fst = 1; - - sel.tfx = tfx; - sel.tcc = tcc; - std::string file = format("Shader_Tfx_%d__Tcc_%d.glsl.asm", tfx, tcc); - RUN_TEST; - } - } - PRINT_TEST("Tfx/Tcc"); - - // Test: Texture Sampling - for (int fmt = 0; fmt < 16; fmt++) { - if ((fmt & 3) == 3) continue; - - for (int ltf = 0; ltf < 2; ltf++) { - for (int aem = 0; aem < 2; aem++) { - for (int wms = 1; wms < 4; wms++) { - for (int wmt = 1; wmt < 4; wmt++) { - PSSelector sel; - sel.atst = 1; - sel.tfx = 1; - sel.tcc = 1; - sel.fst = 1; - - sel.ltf = ltf; - sel.aem = aem; - sel.tex_fmt = fmt; - sel.wms = wms; - sel.wmt = wmt; - std::string file = format("Shader_Ltf_%d__Aem_%d__TFmt_%d__Wms_%d__Wmt_%d.glsl.asm", - ltf, aem, fmt, wms, wmt); - RUN_TEST; - } - } - } - } - } - PRINT_TEST("Texture Sampling"); - - fprintf(stderr, "\nTotal %d\n", all); - -#undef RUN_TEST -#undef PRINT_TEST -} - -GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format) -{ - return GSDevice::CreateRenderTarget(w, h, msaa, format ? format : GL_RGBA8); -} - -GSTexture* GSDeviceOGL::CreateDepthStencil(int w, int h, bool msaa, int format) -{ - return GSDevice::CreateDepthStencil(w, h, msaa, format ? format : GL_DEPTH32F_STENCIL8); -} - -GSTexture* GSDeviceOGL::CreateTexture(int w, int h, int format) -{ - return GSDevice::CreateTexture(w, h, format ? format : GL_RGBA8); -} - -GSTexture* GSDeviceOGL::CreateOffscreen(int w, int h, int format) -{ - return GSDevice::CreateOffscreen(w, h, format ? format : GL_RGBA8); -} - -// blit a texture into an offscreen buffer -GSTexture* GSDeviceOGL::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format, int ps_shader) -{ - if (format == 0) - format = GL_RGBA8; - - ASSERT(src); - ASSERT(format == GL_RGBA8 || format == GL_R16UI || format == GL_R32UI); - - GSTexture* dst = CreateOffscreen(w, h, format); - - GSVector4 dRect(0, 0, w, h); - - StretchRect(src, sRect, dst, dRect, m_convert.ps[ps_shader]); - - return dst; -} - -// Copy a sub part of texture (same as below but force a conversion) -void GSDeviceOGL::CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, bool at_origin) -{ - ASSERT(sTex && dTex); - if (!(sTex && dTex)) - return; - - const GLuint& sid = static_cast(sTex)->GetID(); - const GLuint& did = static_cast(dTex)->GetID(); - - GL_PUSH(format("CopyRectConv from %d to %d", sid, did).c_str()); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, sid, 0); - if (at_origin) - glCopyTextureSubImage2D(did, GL_TEX_LEVEL_0, 0, 0, r.x, r.y, r.width(), r.height()); - else - glCopyTextureSubImage2D(did, GL_TEX_LEVEL_0, r.x, r.y, r.x, r.y, r.width(), r.height()); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - - GL_POP(); -} - -// Copy a sub part of a texture into another -void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) -{ - ASSERT(sTex && dTex); - if (!(sTex && dTex)) - return; - - const GLuint& sid = static_cast(sTex)->GetID(); - const GLuint& did = static_cast(dTex)->GetID(); - - GL_PUSH("CopyRect from %d to %d", sid, did); - - if (GLLoader::found_GL_ARB_copy_image) { - glCopyImageSubData( sid, GL_TEXTURE_2D, - 0, r.x, r.y, 0, - did, GL_TEXTURE_2D, - 0, 0, 0, 0, - r.width(), r.height(), 1); - } else { - // Slower copy (conversion is done) - CopyRectConv(sTex, dTex, r, true); - } - - GL_POP(); -} - -void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear) -{ - StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[shader], linear); -} - -void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, bool linear) -{ - StretchRect(sTex, sRect, dTex, dRect, ps, m_NO_BLEND, linear); -} - -void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, int bs, bool linear) -{ - if(!sTex || !dTex) - { - ASSERT(0); - return; - } - - bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24] || - ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]); - - // Performance optimization. It might be faster to use a framebuffer blit for standard case - // instead to emulate it with shader - // see https://www.opengl.org/wiki/Framebuffer#Blitting - - GL_PUSH("StretchRect from %d to %d", sTex->GetID(), dTex->GetID()); - - // ************************************ - // Init - // ************************************ - - BeginScene(); - - GSVector2i ds = dTex->GetSize(); - - m_shader->VS(m_convert.vs); - m_shader->GS(0); - m_shader->PS(ps); - - // ************************************ - // om - // ************************************ - - if (draw_in_depth) - OMSetDepthStencilState(m_convert.dss_write); - else - OMSetDepthStencilState(m_convert.dss); - - if (draw_in_depth) - OMSetRenderTargets(NULL, dTex); - else - OMSetRenderTargets(dTex, NULL); - - OMSetBlendState(bs); - OMSetColorMaskState(); - - // ************************************ - // ia - // ************************************ - - - // Original code from DX - float left = dRect.x * 2 / ds.x - 1.0f; - float right = dRect.z * 2 / ds.x - 1.0f; -#if 0 - float top = 1.0f - dRect.y * 2 / ds.y; - float bottom = 1.0f - dRect.w * 2 / ds.y; -#else - // Opengl get some issues with the coordinate - // I flip top/bottom to fix scaling of the internal resolution - float top = -1.0f + dRect.y * 2 / ds.y; - float bottom = -1.0f + dRect.w * 2 / ds.y; -#endif - - // Flip y axis only when we render in the backbuffer - // By default everything is render in the wrong order (ie dx). - // 1/ consistency between several pass rendering (interlace) - // 2/ in case some GSdx code expect thing in dx order. - // Only flipping the backbuffer is transparent (I hope)... - GSVector4 flip_sr = sRect; - if (static_cast(dTex)->IsBackbuffer()) { - flip_sr.y = sRect.w; - flip_sr.w = sRect.y; - } - - GSVertexPT1 vertices[] = - { - {GSVector4(left , top , 0.0f, 0.0f) , GSVector2(flip_sr.x , flip_sr.y)} , - {GSVector4(right , top , 0.0f, 0.0f) , GSVector2(flip_sr.z , flip_sr.y)} , - {GSVector4(left , bottom, 0.0f, 0.0f) , GSVector2(flip_sr.x , flip_sr.w)} , - {GSVector4(right , bottom, 0.0f, 0.0f) , GSVector2(flip_sr.z , flip_sr.w)} , - }; - - IASetVertexBuffer(vertices, 4); - IASetPrimitiveTopology(GL_TRIANGLE_STRIP); - - // ************************************ - // Texture - // ************************************ - - PSSetShaderResource(0, sTex); - PSSetSamplerState(linear ? m_convert.ln : m_convert.pt); - - // ************************************ - // Draw - // ************************************ - DrawPrimitive(); - - // ************************************ - // End - // ************************************ - - EndScene(); - - GL_POP(); -} - -void GSDeviceOGL::DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c) -{ - GL_PUSH("DoMerge"); - - OMSetColorMaskState(); - - ClearRenderTarget(dTex, c); - - if(sTex[1] && !slbg) - { - StretchRect(sTex[1], sRect[1], dTex, dRect[1], m_merge_obj.ps[0]); - } - - if(sTex[0]) - { - m_merge_obj.cb->upload(&c.v); - - StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[mmod ? 1 : 0], m_MERGE_BLEND); - } - - GL_POP(); -} - -void GSDeviceOGL::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) -{ - GL_PUSH("DoInterlace"); - - OMSetColorMaskState(); - - GSVector4 s = GSVector4(dTex->GetSize()); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0.0f, yoffset, s.x, s.y + yoffset); - - InterlaceConstantBuffer cb; - - cb.ZrH = GSVector2(0, 1.0f / s.y); - cb.hH = s.y / 2; - - m_interlace.cb->upload(&cb); - - StretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[shader], linear); - - GL_POP(); -} - -void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex) -{ - // Lazy compile - if (!m_fxaa.ps) { - if (!GLLoader::found_GL_ARB_gpu_shader5) { // GL4.0 extension - return; - } - - std::string fxaa_macro = "#define FXAA_GLSL_130 1\n"; - fxaa_macro += "#extension GL_ARB_gpu_shader5 : enable\n"; - m_fxaa.ps = m_shader->Compile("fxaa.fx", "ps_main", GL_FRAGMENT_SHADER, fxaa_fx, fxaa_macro); - } - - GL_PUSH("DoFxaa"); - - OMSetColorMaskState(); - - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - StretchRect(sTex, sRect, dTex, dRect, m_fxaa.ps, true); - - GL_POP(); -} - -void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) -{ - // Lazy compile - if (!m_shaderfx.ps) { - if (!GLLoader::found_GL_ARB_gpu_shader5) { // GL4.0 extension - return; - } - - std::string config_name(theApp.GetConfig("shaderfx_conf", "dummy.ini")); - std::ifstream fconfig(config_name); - std::stringstream config; - if (fconfig.good()) - config << fconfig.rdbuf(); - else - fprintf(stderr, "Warning failed to load '%s'. External Shader might be wrongly configured\n", config_name.c_str()); - - std::string shader_name(theApp.GetConfig("shaderfx_glsl", "dummy.glsl")); - std::ifstream fshader(shader_name); - std::stringstream shader; - if (!fshader.good()) { - fprintf(stderr, "Error failed to load '%s'. External Shader will be disabled !\n", shader_name.c_str()); - return; - } - shader << fshader.rdbuf(); - - - m_shaderfx.cb = new GSUniformBufferOGL(g_fx_cb_index, sizeof(ExternalFXConstantBuffer)); - m_shaderfx.ps = m_shader->Compile("Extra", "ps_main", GL_FRAGMENT_SHADER, shader.str().c_str(), config.str()); - } - - GL_PUSH("DoExternalFX"); - - OMSetColorMaskState(); - - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - ExternalFXConstantBuffer cb; - - cb.xyFrame = GSVector2(s.x, s.y); - cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); - cb.rcpFrameOpt = GSVector4::zero(); - - m_shaderfx.cb->upload(&cb); - - StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps, true); - - GL_POP(); -} - -void GSDeviceOGL::DoShadeBoost(GSTexture* sTex, GSTexture* dTex) -{ - GL_PUSH("DoShadeBoost"); - - OMSetColorMaskState(); - - GSVector2i s = dTex->GetSize(); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0, 0, s.x, s.y); - - ShadeBoostConstantBuffer cb; - - cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); - cb.rcpFrameOpt = GSVector4::zero(); - - m_shadeboost.cb->upload(&cb); - - StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps, true); - - GL_POP(); -} - -void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm) -{ - GL_PUSH("DATE First Pass"); - - // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows - - BeginScene(); - - ClearStencil(ds, 0); - - m_shader->VS(m_convert.vs); - m_shader->GS(0); - m_shader->PS(m_convert.ps[datm ? ShaderConvert_DATM_1 : ShaderConvert_DATM_0]); - - // om - - OMSetDepthStencilState(m_date.dss); - if (GLState::blend) { - glDisable(GL_BLEND); - } - OMSetRenderTargets(NULL, ds, &GLState::scissor); - - // ia - - IASetVertexBuffer(vertices, 4); - IASetPrimitiveTopology(GL_TRIANGLE_STRIP); - - - // Texture - - PSSetShaderResource(0, rt); - PSSetSamplerState(m_convert.pt); - - DrawPrimitive(); - - if (GLState::blend) { - glEnable(GL_BLEND); - } - - EndScene(); - - GL_POP(); -} - -void GSDeviceOGL::EndScene() -{ - m_va->EndScene(); -} - -void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count) -{ - m_va->UploadVB(vertices, count); -} - -void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count) -{ - m_va->UploadIB(index, count); -} - -void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology) -{ - m_va->SetTopology(topology); -} - -void GSDeviceOGL::PSSetShaderResource(int i, GSTexture* sr) -{ - ASSERT(i < (int)countof(GLState::tex_unit)); - // Note: Nvidia debgger doesn't support the id 0 (ie the NULL texture) - if (sr) { - GLuint id = static_cast(sr)->GetID(); - if (GLState::tex_unit[i] != id) { - GLState::tex_unit[i] = id; - glBindTextureUnit(i, id); - } - } -} - -void GSDeviceOGL::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) -{ - PSSetShaderResource(0, sr0); - PSSetShaderResource(1, sr1); -} - -void GSDeviceOGL::PSSetSamplerState(GLuint ss) -{ - if (GLState::ps_ss != ss) { - GLState::ps_ss = ss; - glBindSampler(0, ss); - } -} - -void GSDeviceOGL::OMAttachRt(GSTextureOGL* rt) -{ - GLuint id; - if (rt) { - rt->WasAttached(); - id = rt->GetID(); - } else { - id = 0; - } - - if (GLState::rt != id) { - GLState::rt = id; - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, id, 0); - } -} - -void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds) -{ - GLuint id; - if (ds) { - ds->WasAttached(); - id = ds->GetID(); - } else { - id = 0; - } - - if (GLState::ds != id) { - GLState::ds = id; - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0); - } -} - -void GSDeviceOGL::OMSetFBO(GLuint fbo) -{ - if (GLState::fbo != fbo) { - GLState::fbo = fbo; - glBindFramebuffer(GL_FRAMEBUFFER, fbo); - } -} - -void GSDeviceOGL::OMSetDepthStencilState(GSDepthStencilOGL* dss) -{ - dss->SetupDepth(); - dss->SetupStencil(); -} - -void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) -{ - if (sel.wrgba != GLState::wrgba) { - GLState::wrgba = sel.wrgba; - - glColorMaski(0, sel.wr, sel.wg, sel.wb, sel.wa); - } -} - -void GSDeviceOGL::OMSetBlendState(uint8 blend_index, uint8 blend_factor, bool is_blend_constant) -{ - if (blend_index) { - if (!GLState::blend) { - GLState::blend = true; - glEnable(GL_BLEND); - } - - if (is_blend_constant && GLState::bf != blend_factor) { - GLState::bf = blend_factor; - float bf = (float)blend_factor / 128.0f; - gl_BlendColor(bf, bf, bf, bf); - } - - const OGLBlend& b = m_blendMapOGL[blend_index]; - - if (GLState::eq_RGB != b.op) { - GLState::eq_RGB = b.op; - if (glBlendEquationSeparateiARB) - glBlendEquationSeparateiARB(0, b.op, GL_FUNC_ADD); - else - glBlendEquationSeparate(b.op, GL_FUNC_ADD); - } - - if (GLState::f_sRGB != b.src || GLState::f_dRGB != b.dst) { - GLState::f_sRGB = b.src; - GLState::f_dRGB = b.dst; - if (glBlendFuncSeparateiARB) - glBlendFuncSeparateiARB(0, b.src, b.dst, GL_ONE, GL_ZERO); - else - glBlendFuncSeparate(b.src, b.dst, GL_ONE, GL_ZERO); - } - - } else { - if (GLState::blend) { - GLState::blend = false; - glDisable(GL_BLEND); - } - } -} - -void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) -{ - GSTextureOGL* RT = static_cast(rt); - GSTextureOGL* DS = static_cast(ds); - - if (rt == NULL || !RT->IsBackbuffer()) { - OMSetFBO(m_fbo); - if (rt) { - OMAttachRt(RT); - } else { - OMAttachRt(); - } - - // Note: it must be done after OMSetFBO - if (ds) - OMAttachDs(DS); - else - OMAttachDs(); - - } else { - // Render in the backbuffer - OMSetFBO(0); - } - - - GSVector2i size = rt ? rt->GetSize() : ds ? ds->GetSize() : GLState::viewport; - if(GLState::viewport != size) - { - GLState::viewport = size; - // FIXME ViewportIndexedf or ViewportIndexedfv (GL4.1) - glViewport(0, 0, size.x, size.y); - } - - GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy(); - - if(!GLState::scissor.eq(r)) - { - GLState::scissor = r; - // FIXME ScissorIndexedv (GL4.1) - glScissor( r.x, r.y, r.width(), r.height() ); - } -} - -void GSDeviceOGL::CheckDebugLog() -{ - if (!m_debug_gl_call) return; - - unsigned int count = 16; // max. num. of messages that will be read from the log - int bufsize = 2048; - unsigned int sources[16] = {}; - unsigned int types[16] = {}; - unsigned int ids[16] = {}; - unsigned int severities[16] = {}; - int lengths[16] = {}; - char* messageLog = new char[bufsize]; - - unsigned int retVal = glGetDebugMessageLogARB(count, bufsize, sources, types, ids, severities, lengths, messageLog); - - if(retVal > 0) - { - unsigned int pos = 0; - for(unsigned int i=0; i= 0 ? gl_length : strlen(gl_message)); - std::string type, severity, source; - static int sev_counter = 0; - switch(gl_type) { - case GL_DEBUG_TYPE_ERROR_ARB : type = "Error"; break; - case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB : type = "Deprecated bhv"; break; - case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB : type = "Undefined bhv"; break; - case GL_DEBUG_TYPE_PORTABILITY_ARB : type = "Portability"; break; - case GL_DEBUG_TYPE_PERFORMANCE_ARB : type = "Perf"; break; - case GL_DEBUG_TYPE_OTHER_ARB : type = "Others"; break; - case GL_DEBUG_TYPE_PUSH_GROUP : return; // Don't print message injected by myself - case GL_DEBUG_TYPE_POP_GROUP : return; // Don't print message injected by myself - default : type = "TTT"; break; - } - switch(gl_severity) { - case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; sev_counter++; break; - case GL_DEBUG_SEVERITY_MEDIUM_ARB : severity = "Mid"; break; - case GL_DEBUG_SEVERITY_LOW_ARB : severity = "Low"; break; - default : severity = "Info"; break; - } - switch(gl_source) { - case GL_DEBUG_SOURCE_API_ARB : source = "API"; break; - case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB : source = "WINDOW"; break; - case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB : source = "COMPILER"; break; - case GL_DEBUG_SOURCE_THIRD_PARTY_ARB : source = "3rdparty"; break; - case GL_DEBUG_SOURCE_APPLICATION_ARB : source = "Application"; break; - case GL_DEBUG_SOURCE_OTHER_ARB : source = "Others"; break; - default : source = "???"; break; - } - - #ifdef _DEBUG - // Don't spam noisy information on the terminal - if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION) { - fprintf(stderr,"Type:%s\tID:%d\tSeverity:%s\tMessage:%s\n", type.c_str(), s_n, severity.c_str(), message.c_str()); - } - #endif - - if (m_debug_gl_file) - fprintf(m_debug_gl_file,"Type:%s\tID:%d\tSeverity:%s\tMessage:%s\n", type.c_str(), s_n, severity.c_str(), message.c_str()); - -#ifdef _DEBUG - if (sev_counter >= 5) { - // Close the file to flush the content on disk before exiting. - if (m_debug_gl_file) { - fclose(m_debug_gl_file); - m_debug_gl_file = NULL; - } - ASSERT(0); - } -#endif -} - -// (A - B) * C + D -// A: Cs/Cd/0 -// B: Cs/Cd/0 -// C: As/Ad/FIX -// D: Cs/Cd/0 - -// bogus: 0100, 0110, 0120, 0200, 0210, 0220, 1001, 1011, 1021 -// tricky: 1201, 1211, 1221 - -// Source.rgb = float3(1, 1, 1); -// 1201 Cd*(1 + As) => Source * Dest color + Dest * Source alpha -// 1211 Cd*(1 + Ad) => Source * Dest color + Dest * Dest alpha -// 1221 Cd*(1 + F) => Source * Dest color + Dest * Factor - -// Special blending method table: -// # (tricky) => 1 * Cd + Cd * F => Use (Cd, F) as factor of color (1, Cd) -// * (bogus) => C * (1 + F ) + ... => factor is always bigger than 1 (except above case) -// ? => Cs * F + Cd => do the multiplication in shader and addition in blending unit. It is an optimization - -// Copy Dx blend table and convert it to ogl -#define D3DBLENDOP_ADD GL_FUNC_ADD -#define D3DBLENDOP_SUBTRACT GL_FUNC_SUBTRACT -#define D3DBLENDOP_REVSUBTRACT GL_FUNC_REVERSE_SUBTRACT - -#define D3DBLEND_ONE GL_ONE -#define D3DBLEND_ZERO GL_ZERO -#define D3DBLEND_INVDESTALPHA GL_ONE_MINUS_DST_ALPHA -#define D3DBLEND_DESTALPHA GL_DST_ALPHA -#define D3DBLEND_DESTCOLOR GL_DST_COLOR -#define D3DBLEND_BLENDFACTOR GL_CONSTANT_COLOR -#define D3DBLEND_INVBLENDFACTOR GL_ONE_MINUS_CONSTANT_COLOR - -#define D3DBLEND_SRCALPHA GL_SRC1_ALPHA -#define D3DBLEND_INVSRCALPHA GL_ONE_MINUS_SRC1_ALPHA - -const int GSDeviceOGL::m_NO_BLEND = 0; -const int GSDeviceOGL::m_MERGE_BLEND = 3*3*3*3; - -const GSDeviceOGL::OGLBlend GSDeviceOGL::m_blendMapOGL[3*3*3*3 + 1] = -{ - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0002: (Cs - Cs)*As + 0 ==> 0 - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0010: (Cs - Cs)*Ad + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0011: (Cs - Cs)*Ad + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0012: (Cs - Cs)*Ad + 0 ==> 0 - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0020: (Cs - Cs)*F + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0021: (Cs - Cs)*F + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0022: (Cs - Cs)*F + 0 ==> 0 - { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_SRCALPHA} , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As - { 0 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_INVSRCALPHA} , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , // 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As - { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , //*0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad - { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_INVDESTALPHA} , // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad) - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad - { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , //*0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F - { 0 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F - { BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) - { BLEND_ACCU , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , //?0201: (Cs - 0)*As + Cd ==> Cs*As + Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As - { BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) - { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd - { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad - { BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) - { BLEND_ACCU , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , //?0221: (Cs - 0)*F + Cd ==> Cs*F + Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F - { 0 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_SRCALPHA} , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) - { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_ONE} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As - { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , // 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As - { 0 , D3DBLENDOP_ADD , D3DBLEND_INVDESTALPHA , D3DBLEND_DESTALPHA} , // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad) - { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , //*1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad - { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad - { 0 , D3DBLENDOP_ADD , D3DBLEND_INVBLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F) - { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ONE} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F - { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1100: (Cd - Cd)*As + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1101: (Cd - Cd)*As + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1102: (Cd - Cd)*As + 0 ==> 0 - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1110: (Cd - Cd)*Ad + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1111: (Cd - Cd)*Ad + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1112: (Cd - Cd)*Ad + 0 ==> 0 - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1120: (Cd - Cd)*F + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0 - { 0 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_SRCALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As - { BLEND_C_CLR , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_SRCALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_SRCALPHA} , // 1202: (Cd - 0)*As + 0 ==> Cd*As - { 0 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad - { BLEND_C_CLR , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_DESTALPHA} , //#1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_DESTALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad - { 0 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F - { BLEND_C_CLR , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_BLENDFACTOR} , //#1221: (Cd - 0)*F + Cd ==> Cd*(1 + F) - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_BLENDFACTOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) - { BLEND_ACCU , D3DBLENDOP_REVSUBTRACT , D3DBLEND_ONE , D3DBLEND_ONE} , // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As - { BLEND_NO_BAR , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As - { 0 , D3DBLENDOP_ADD , D3DBLEND_INVDESTALPHA , D3DBLEND_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) - { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad - { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_INVBLENDFACTOR , D3DBLEND_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) - { BLEND_ACCU , D3DBLENDOP_REVSUBTRACT , D3DBLEND_ONE , D3DBLEND_ONE} , // 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F - { BLEND_NO_BAR , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_SRCALPHA} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVSRCALPHA} , // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As) - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ZERO , D3DBLEND_SRCALPHA} , // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVDESTALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVBLENDFACTOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) - { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2200: (0 - 0)*As + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2201: (0 - 0)*As + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2202: (0 - 0)*As + 0 ==> 0 - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2210: (0 - 0)*Ad + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2211: (0 - 0)*Ad + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2212: (0 - 0)*Ad + 0 ==> 0 - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2220: (0 - 0)*F + Cs ==> Cs - { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2221: (0 - 0)*F + Cd ==> Cd - { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2222: (0 - 0)*F + 0 ==> 0 - { 0 , D3DBLENDOP_ADD , GL_SRC_ALPHA , GL_ONE_MINUS_SRC_ALPHA} , // extra for merge operation -}; diff --git a/plugins/GSdx_legacy/GSDeviceOGL.h b/plugins/GSdx_legacy/GSDeviceOGL.h deleted file mode 100644 index 2199bd1d22..0000000000 --- a/plugins/GSdx_legacy/GSDeviceOGL.h +++ /dev/null @@ -1,551 +0,0 @@ -/* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDevice.h" -#include "GSTextureOGL.h" -#include "GSdx.h" -#include "GSVertexArrayOGL.h" -#include "GSUniformBufferOGL.h" -#include "GSShaderOGL.h" -#include "GLState.h" - -// A couple of flag to determine the blending behavior -#define BLEND_A_MAX (0x100) // Impossible blending uses coeff bigger than 1 -#define BLEND_C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor) -#define BLEND_NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used) -#define BLEND_ACCU (0x800) // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds - -#ifdef ENABLE_OGL_DEBUG_MEM_BW -extern uint64 g_real_texture_upload_byte; -extern uint64 g_vertex_upload_byte; -#endif - -class GSDepthStencilOGL { - bool m_depth_enable; - GLenum m_depth_func; - bool m_depth_mask; - // Note front face and back might be split but it seems they have same parameter configuration - bool m_stencil_enable; - GLenum m_stencil_func; - GLenum m_stencil_spass_dpass_op; - -public: - - GSDepthStencilOGL() : m_depth_enable(false) - , m_depth_func(GL_ALWAYS) - , m_depth_mask(0) - , m_stencil_enable(false) - , m_stencil_func(0) - , m_stencil_spass_dpass_op(GL_KEEP) - { - } - - void EnableDepth() { m_depth_enable = true; } - void EnableStencil() { m_stencil_enable = true; } - - void SetDepth(GLenum func, bool mask) { m_depth_func = func; m_depth_mask = mask; } - void SetStencil(GLenum func, GLenum pass) { m_stencil_func = func; m_stencil_spass_dpass_op = pass; } - - void SetupDepth() - { - if (GLState::depth != m_depth_enable) { - GLState::depth = m_depth_enable; - if (m_depth_enable) - glEnable(GL_DEPTH_TEST); - else - glDisable(GL_DEPTH_TEST); - } - - if (m_depth_enable) { - if (GLState::depth_func != m_depth_func) { - GLState::depth_func = m_depth_func; - glDepthFunc(m_depth_func); - } - if (GLState::depth_mask != m_depth_mask) { - GLState::depth_mask = m_depth_mask; - glDepthMask((GLboolean)m_depth_mask); - } - } - } - - void SetupStencil() - { - if (GLState::stencil != m_stencil_enable) { - GLState::stencil = m_stencil_enable; - if (m_stencil_enable) - glEnable(GL_STENCIL_TEST); - else - glDisable(GL_STENCIL_TEST); - } - - if (m_stencil_enable) { - // Note: here the mask control which bitplane is considered by the operation - if (GLState::stencil_func != m_stencil_func) { - GLState::stencil_func = m_stencil_func; - glStencilFunc(m_stencil_func, 1, 1); - } - if (GLState::stencil_pass != m_stencil_spass_dpass_op) { - GLState::stencil_pass = m_stencil_spass_dpass_op; - glStencilOp(GL_KEEP, GL_KEEP, m_stencil_spass_dpass_op); - } - } - } - - bool IsMaskEnable() { return m_depth_mask != GL_FALSE; } -}; - -class GSDeviceOGL final : public GSDevice -{ - public: - __aligned(struct, 32) VSConstantBuffer - { - GSVector4 Vertex_Scale_Offset; - GSVector4 TextureScale; - - VSConstantBuffer() - { - Vertex_Scale_Offset = GSVector4::zero(); - TextureScale = GSVector4::zero(); - } - - __forceinline bool Update(const VSConstantBuffer* cb) - { - GSVector4i* a = (GSVector4i*)this; - GSVector4i* b = (GSVector4i*)cb; - - if(!((a[0] == b[0]) & (a[1] == b[1])).alltrue()) - { - a[0] = b[0]; - a[1] = b[1]; - - return true; - } - - return false; - } - }; - - struct VSSelector - { - union - { - struct - { - uint32 wildhack:1; - uint32 bppz:2; - - uint32 _free:29; - }; - - uint32 key; - }; - - operator uint32() {return key;} - - VSSelector() : key(0) {} - VSSelector(uint32 k) : key(k) {} - }; - - struct GSSelector - { - union - { - struct - { - uint32 sprite:1; - uint32 point:1; - - uint32 _free:30; - }; - - uint32 key; - }; - - operator uint32() {return key;} - - GSSelector() : key(0) {} - GSSelector(uint32 k) : key(k) {} - }; - - __aligned(struct, 32) PSConstantBuffer - { - GSVector4 FogColor_AREF; - GSVector4 WH; - GSVector4 TA_Af; - GSVector4i MskFix; - GSVector4i FbMask; - - GSVector4 HalfTexel; - GSVector4 MinMax; - GSVector4 TC_OH_TS; - - PSConstantBuffer() - { - FogColor_AREF = GSVector4::zero(); - HalfTexel = GSVector4::zero(); - WH = GSVector4::zero(); - MinMax = GSVector4::zero(); - MskFix = GSVector4i::zero(); - TC_OH_TS = GSVector4::zero(); - FbMask = GSVector4i::zero(); - } - - __forceinline bool Update(const PSConstantBuffer* cb) - { - GSVector4i* a = (GSVector4i*)this; - GSVector4i* b = (GSVector4i*)cb; - - // if WH matches both HalfTexel and TC_OH_TS do too - // MinMax depends on WH and MskFix so no need to check it too - if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4])).alltrue()) - { - // Note previous check uses SSE already, a plain copy will be faster than any memcpy - a[0] = b[0]; - a[1] = b[1]; - a[2] = b[2]; - a[3] = b[3]; - a[4] = b[4]; - a[5] = b[5]; - - return true; - } - - return false; - } - }; - - struct PSSelector - { - // Performance note: there are too many shader combinations - // It might hurt the performance due to frequent toggling worse it could consume - // a lots of memory. - union - { - struct - { - // *** Word 1 - // Format - uint32 tex_fmt:4; - uint32 dfmt:2; - // Alpha extension/Correction - uint32 aem:1; - uint32 fba:1; - // Fog - uint32 fog:1; - // Flat/goround shading - uint32 iip:1; - // Pixel test - uint32 date:3; - uint32 atst:3; - // Color sampling - uint32 fst:1; // Investigate to do it on the VS - uint32 tfx:3; - uint32 tcc:1; - uint32 wms:2; - uint32 wmt:2; - uint32 ltf:1; - // Shuffle and fbmask effect - uint32 shuffle:1; - uint32 read_ba:1; - uint32 write_rg:1; - uint32 fbmask:1; - - uint32 _free1:2; - - // *** Word 2 - // Blend and Colclip - uint32 blend_a:2; - uint32 blend_b:2; - uint32 blend_c:2; - uint32 blend_d:2; - uint32 clr1:1; // useful? - uint32 pabe:1; - uint32 hdr:1; - uint32 colclip:1; - - // Hack - uint32 tcoffsethack:1; - - uint32 _free2:19; - }; - - uint64 key; - }; - - // FIXME is the & useful ? - operator uint64() {return key;} - - PSSelector() : key(0) {} - }; - - struct PSSamplerSelector - { - union - { - struct - { - uint32 tau:1; - uint32 tav:1; - uint32 ltf:1; - uint32 aniso:1; - - uint32 _free:28; - }; - - uint32 key; - }; - - operator uint32() {return key;} - - PSSamplerSelector() : key(0) {} - PSSamplerSelector(uint32 k) : key(k) {} - }; - - struct OMDepthStencilSelector - { - union - { - struct - { - uint32 ztst:2; - uint32 zwe:1; - uint32 date:1; - - uint32 _free:28; - }; - - uint32 key; - }; - - // FIXME is the & useful ? - operator uint32() {return key;} - - OMDepthStencilSelector() : key(0) {} - OMDepthStencilSelector(uint32 k) : key(k) {} - }; - - struct OMColorMaskSelector - { - union - { - struct - { - uint32 wr:1; - uint32 wg:1; - uint32 wb:1; - uint32 wa:1; - - uint32 _free:28; - }; - - struct - { - uint32 wrgba:4; - }; - - uint32 key; - }; - - // FIXME is the & useful ? - operator uint32() {return key & 0xf;} - - OMColorMaskSelector() : key(0xF) {} - OMColorMaskSelector(uint32 c) { wrgba = c; } - }; - - struct OGLBlend {uint16 bogus, op, src, dst;}; - static const OGLBlend m_blendMapOGL[3*3*3*3 + 1]; - static const int m_NO_BLEND; - static const int m_MERGE_BLEND; - - static int s_n; - - private: - uint32 m_msaa; // Level of Msaa - - static bool m_debug_gl_call; - static FILE* m_debug_gl_file; - - GSWnd* m_window; - - GLuint m_fbo; // frame buffer container - GLuint m_fbo_read; // frame buffer container only for reading - - GSVertexBufferStateOGL* m_va;// state of the vertex buffer/array - - struct { - GLuint ps[2]; // program object - GSUniformBufferOGL* cb; // uniform buffer object - } m_merge_obj; - - struct { - GLuint ps[4]; // program object - GSUniformBufferOGL* cb; // uniform buffer object - } m_interlace; - - struct { - GLuint vs; // program object - GLuint ps[18]; // program object - GLuint ln; // sampler object - GLuint pt; // sampler object - GSDepthStencilOGL* dss; - GSDepthStencilOGL* dss_write; - GSUniformBufferOGL* cb; - } m_convert; - - struct { - GLuint ps; - GSUniformBufferOGL *cb; - } m_fxaa; - - struct { - GLuint ps; - GSUniformBufferOGL* cb; - } m_shaderfx; - - struct { - GSDepthStencilOGL* dss; - GSTexture* t; - } m_date; - - struct { - GLuint ps; - GSUniformBufferOGL *cb; - } m_shadeboost; - - GLuint m_vs[1<<3]; - GLuint m_gs[1<<2]; - GLuint m_ps_ss[1<<4]; - GSDepthStencilOGL* m_om_dss[1<<4]; - hash_map m_ps; - GLuint m_apitrace; - - GLuint m_palette_ss; - - GSUniformBufferOGL* m_vs_cb; - GSUniformBufferOGL* m_ps_cb; - - VSConstantBuffer m_vs_cb_cache; - PSConstantBuffer m_ps_cb_cache; - - GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format); - GSTexture* FetchSurface(int type, int w, int h, bool msaa, int format); - - void DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c) final; - void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0) final; - void DoFXAA(GSTexture* sTex, GSTexture* dTex) final; - void DoShadeBoost(GSTexture* sTex, GSTexture* dTex) final; - void DoExternalFX(GSTexture* sTex, GSTexture* dTex) final; - - void OMAttachRt(GSTextureOGL* rt = NULL); - void OMAttachDs(GSTextureOGL* ds = NULL); - void OMSetFBO(GLuint fbo); - - public: - GSShaderOGL* m_shader; - - GSDeviceOGL(); - virtual ~GSDeviceOGL(); - - static void CheckDebugLog(); - // Used by OpenGL, so the same calling convention is required. - static void APIENTRY DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar *gl_message, const void* userParam); - - bool HasStencil() { return true; } - bool HasDepth32() { return true; } - - bool Create(GSWnd* wnd); - bool Reset(int w, int h); - void Flip(); - void SetVSync(bool enable); - - void DrawPrimitive() final; - void DrawPrimitive(int offset, int count); - void DrawIndexedPrimitive() final; - void DrawIndexedPrimitive(int offset, int count) final; - inline void BeforeDraw(); - inline void AfterDraw(); - - void ClearRenderTarget(GSTexture* t, const GSVector4& c) final; - void ClearRenderTarget(GSTexture* t, uint32 c) final; - void ClearRenderTarget_i(GSTexture* t, int32 c); - void ClearDepth(GSTexture* t, float c) final; - void ClearStencil(GSTexture* t, uint8 c) final; - - GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0) final; - GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0) final; - GSTexture* CreateTexture(int w, int h, int format = 0) final; - GSTexture* CreateOffscreen(int w, int h, int format = 0) final; - void InitPrimDateTexture(GSTexture* rt); - void RecycleDateTexture(); - - GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0) final; - - void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) final; - void CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, bool at_origin); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true) final; - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, bool linear = true); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, int bs, bool linear = true); - - void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm); - - void BeginScene() final {} - void EndScene() final; - - void IASetPrimitiveTopology(GLenum topology); - void IASetVertexBuffer(const void* vertices, size_t count); - void IASetIndexBuffer(const void* index, size_t count); - - void PSSetShaderResource(int i, GSTexture* sr) final; - void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) final; - void PSSetSamplerState(GLuint ss); - - void OMSetDepthStencilState(GSDepthStencilOGL* dss); - void OMSetBlendState(uint8 blend_index = 0, uint8 blend_factor = 0, bool is_blend_constant = false); - void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) final; - void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector()); - - - void CreateTextureFX(); - GLuint CompileVS(VSSelector sel, int logz); - GLuint CompileGS(GSSelector sel); - GLuint CompilePS(PSSelector sel); - GLuint CreateSampler(bool bilinear, bool tau, bool tav, bool aniso = false); - GLuint CreateSampler(PSSamplerSelector sel); - GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel); - - void SelfShaderTest(); - - - void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); - void SetupVS(VSSelector sel); - void SetupGS(GSSelector sel); - void SetupPS(PSSelector sel); - void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb); - void SetupSampler(PSSamplerSelector ssel); - void SetupOM(OMDepthStencilSelector dssel); - GLuint GetSamplerID(PSSamplerSelector ssel); - GLuint GetPaletteSamplerID(); - - void Barrier(GLbitfield b); -}; diff --git a/plugins/GSdx_legacy/GSDeviceSW.cpp b/plugins/GSdx_legacy/GSDeviceSW.cpp deleted file mode 100644 index 5518e5eecd..0000000000 --- a/plugins/GSdx_legacy/GSDeviceSW.cpp +++ /dev/null @@ -1,436 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDeviceSW.h" - -GSDeviceSW::GSDeviceSW() -{ -} - -bool GSDeviceSW::Create(GSWnd* wnd) -{ - if(!GSDevice::Create(wnd)) - return false; - - Reset(1, 1); - - return true; -} - -bool GSDeviceSW::Reset(int w, int h) -{ - if(!GSDevice::Reset(w, h)) - return false; - - // TODO: m_backbuffer should be a window wrapper, or some native bitmap, software-only StretchRect to a full screen window may be too slow - - m_backbuffer = new GSTextureSW(GSTexture::RenderTarget, w, h); - - return true; -} - -GSTexture* GSDeviceSW::CreateSurface(int type, int w, int h, bool msaa, int format) -{ - if(format != 0) return NULL; // there is only one format - - return new GSTextureSW(type, w, h); -} - -void GSDeviceSW::BeginScene() -{ - // TODO -} - -void GSDeviceSW::DrawPrimitive() -{ - // TODO -} - -void GSDeviceSW::EndScene() -{ - // TODO -} - -void GSDeviceSW::ClearRenderTarget(GSTexture* t, const GSVector4& c) -{ - Clear(t, (c * 255 + 0.5f).rgba32()); -} - -void GSDeviceSW::ClearRenderTarget(GSTexture* t, uint32 c) -{ - Clear(t, c); -} - -void GSDeviceSW::ClearDepth(GSTexture* t, float c) -{ - Clear(t, *(uint32*)&c); -} - -void GSDeviceSW::ClearStencil(GSTexture* t, uint8 c) -{ - Clear(t, c); -} - -GSTexture* GSDeviceSW::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format, int ps_shader) -{ - GSTexture* dst = CreateOffscreen(w, h, format); - - if(dst != NULL) - { - CopyRect(src, dst, GSVector4i(0, 0, w, h)); - } - - return dst; -} - -void GSDeviceSW::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) -{ - GSTexture::GSMap m; - - if(sTex->Map(m, &r)) - { - dTex->Update(r, m.bits, m.pitch); - - sTex->Unmap(); - } -} - -class ShaderBase -{ -protected: - GSVector4i Sample(const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const - { - GSVector4i c0 = c.upl8(); - GSVector4i c1 = c.uph8(); - - c0 = c0.lerp16<0>(c1, vf); - c0 = c0.lerp16<0>(c0.srl<8>(), uf); - - return c0; - } - - GSVector4i Blend(const GSVector4i& c0, const GSVector4i& c1) const - { - return c0.lerp16<0>(c1, c1.wwwwl().sll16(7)); - } - - GSVector4i Blend2x(const GSVector4i& c0, const GSVector4i& c1) const - { - return c0.lerp16<0>(c1, c1.wwwwl().sll16(1).pu16().uph8().sll16(7)); // .sll16(1).pu16() => 2x, then clamp (...) - } - - GSVector4i Blend(const GSVector4i& c0, const GSVector4i& c1, const GSVector4i& f) const - { - return c0.lerp16<0>(c1, f); - } -}; - -class ShaderCopy : public ShaderBase -{ -public: - void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const - { - *dst = Sample(c, uf, vf).pu16().extract32<0>(); - } - - void operator() (uint32* RESTRICT dst, uint32 c) const - { - *dst = c; - } -}; - -class ShaderAlphaBlend : public ShaderBase -{ -public: - void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const - { - *dst = Blend(Sample(c, uf, vf), GSVector4i(*dst).uph8()).pu16().extract32<0>(); - } - - void operator() (uint32* RESTRICT dst, uint32 c) const - { - *dst = Blend(GSVector4i(c), GSVector4i(*dst).uph8()).pu16().extract32<0>(); - } -}; - -class ShaderAlpha2xBlend : public ShaderBase -{ -public: - void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const - { - *dst = Blend2x(Sample(c, uf, vf), GSVector4i(*dst).uph8()).pu16().extract32<0>(); - } - - void operator() (uint32* RESTRICT dst, uint32 c) const - { - *dst = Blend2x(GSVector4i(c), GSVector4i(*dst).uph8()).pu16().extract32<0>(); - } -}; - -__aligned(class, 16) ShaderFactorBlend : public ShaderBase -{ - GSVector4i m_f; - -public: - ShaderFactorBlend(uint32 f) - { - m_f = GSVector4i((f << 16) | f).xxxx().srl16(1); - } - - void operator() (uint32* RESTRICT dst, const GSVector4i& c, const GSVector4i& uf, const GSVector4i& vf) const - { - *dst = Blend(Sample(c, uf, vf), GSVector4i(*dst).uph8(), m_f).pu16().extract32<0>(); - } - - void operator() (uint32* RESTRICT dst, uint32 c) const - { - *dst = Blend(GSVector4i(c), GSVector4i(*dst).uph8(), m_f).pu16().extract32<0>(); - } -}; - -template static void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const SHADER& shader, bool linear) -{ - GSVector4i r(dRect.ceil()); - - r = r.rintersect(GSVector4i(dTex->GetSize()).zwxy()); - - if(r.rempty()) return; - - GSTexture::GSMap dm; - - if(!dTex->Map(dm, &r)) return; - - GSTexture::GSMap sm; - - if(!sTex->Map(sm, NULL)) {dTex->Unmap(); return;} - - GSVector2i ssize = sTex->GetSize(); - - GSVector4 p = dRect; - GSVector4 t = sRect * GSVector4(ssize).xyxy() * GSVector4((float)0x10000); - - GSVector4 tl = p.xyxy(t); - GSVector4 br = p.zwzw(t); - GSVector4 tlbr = br - tl; - - tlbr /= tlbr.xyxy(); - - if(tl.x < (float)r.left) tl.z += tlbr.z * ((float)r.left - tl.x); - if(tl.y < (float)r.top) tl.w += tlbr.w * ((float)r.top - tl.y); - - GSVector4i uvdudv(tl.zwzw(tlbr)); - - GSVector4i uv = uvdudv.xxyy() + GSVector4i(0, 0x10000).xyxy(); - GSVector4i du = uvdudv.zzzz().srl<8>(); - GSVector4i dv = uvdudv.wwww().sll<8>(); - - // TODO: clipping may not be that necessary knowing we don't address outside (except the linear filter +1 pixel) - - GSVector4i uvmax = GSVector4i((ssize.x - 1) << 16, (ssize.y - 1) << 16).xxyy(); - - GSVector4i v = uv; - - if(linear) - { - for(int j = r.height(); j > 0; j--, v += dv, dm.bits += dm.pitch) - { - GSVector4i vf = v.zzwwh().zzww().srl16(1); - GSVector4i vi = v.max_i16(GSVector4i::zero()).min_i16(uvmax); - - int v0 = vi.extract16<5>(); - int v1 = vi.extract16<7>(); - - uint32* RESTRICT src0 = (uint32*)&sm.bits[v0 * sm.pitch]; - uint32* RESTRICT src1 = (uint32*)&sm.bits[v1 * sm.pitch]; - uint32* RESTRICT dst = (uint32*)dm.bits; - - GSVector4i u = v; - - for(int i = r.width(); i > 0; i--, dst++, u += du) - { - GSVector4i uf = u.xxyyh().xxyy().srl16(1); - GSVector4i ui = u.max_i16(GSVector4i::zero()).min_i16(uvmax); - - int u0 = ui.extract16<1>(); - int u1 = ui.extract16<3>(); - - shader(dst, GSVector4i(src0[u0], src0[u1], src1[u0], src1[u1]), uf, vf); - } - } - } - else - { - for(int j = r.height(); j > 0; j--, v += dv, dm.bits += dm.pitch) - { - GSVector4i vi = v.max_i16(GSVector4i::zero()).min_i16(uvmax); - - uint32* RESTRICT src = (uint32*)&sm.bits[vi.extract16<5>() * sm.pitch]; - uint32* RESTRICT dst = (uint32*)dm.bits; - - GSVector4i u = v; - - for(int i = r.width(); i > 0; i--, dst++, u += du) - { - GSVector4i ui = u.max_i16(GSVector4i::zero()).min_i16(uvmax); - - shader(dst, src[ui.extract16<1>()]); - } - } - } - - sTex->Unmap(); - dTex->Unmap(); -} - -void GSDeviceSW::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear) -{ - // TODO: if dTex == m_backbuffer && m_backbuffer is special - - if(shader == 0) - { - if((sRect == GSVector4(0, 0, 1, 1) & dRect == GSVector4(dTex->GetSize()).zwxy()).alltrue() && sTex->GetSize() == dTex->GetSize()) - { - // shortcut - - CopyRect(sTex, dTex, GSVector4i(dTex->GetSize()).zwxy()); - - return; - } - - ShaderCopy s; - - ::StretchRect(sTex, sRect, dTex, dRect, s, linear); - } - else if(shader == 1) - { - ShaderAlphaBlend s; - - ::StretchRect(sTex, sRect, dTex, dRect, s, linear); - } - else - { - ASSERT(0); - } -} - -void GSDeviceSW::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) -{ - // TODO -} - -void GSDeviceSW::PSSetShaderResource(int i, GSTexture* sRect) -{ - // TODO -} - -void GSDeviceSW::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) -{ - // TODO -} - -// - -void GSDeviceSW::DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c) -{ - ClearRenderTarget(dTex, c); - - if(sTex[1] && !slbg) - { - StretchRect(sTex[1], sRect[1], dTex, dRect[1]); - } - - if(sTex[0]) - { - if(mmod == 0) - { - // alpha = min(sTex[0].a * 2, 1) - - ShaderAlpha2xBlend s; - - ::StretchRect(sTex[0], sRect[0], dTex, dRect[0], s, true); - } - else - { - // alpha = c.a - - ShaderFactorBlend s((uint32)(int)(c.a * 255)); - - ::StretchRect(sTex[0], sRect[0], dTex, dRect[0], s, true); - } - } - - // dTex->Save("c:\\1.bmp"); -} - -void GSDeviceSW::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) -{ - GSVector4 s = GSVector4(dTex->GetSize()); - - GSVector4 sRect(0, 0, 1, 1); - GSVector4 dRect(0.0f, yoffset, s.x, s.y + yoffset); - - if(shader == 0 || shader == 1) - { - // TODO: 0/1 => update even/odd lines of dTex - } - else if(shader == 2) - { - // TODO: blend lines (1:2:1 filter) - } - else if(shader == 3) - { - StretchRect(sTex, sRect, dTex, dRect, 0, linear); - } - else - { - ASSERT(0); - } -} - -void GSDeviceSW::Clear(GSTexture* t, uint32 c) -{ - int w = t->GetWidth(); - int h = t->GetHeight(); - - GSTexture::GSMap m; - - if(t->Map(m, NULL)) - { - GSVector4i v((int)c); - - w >>= 2; - - for(int j = 0; j < h; j++, m.bits += m.pitch) - { - GSVector4i* RESTRICT dst = (GSVector4i*)m.bits; - - for(int i = 0; i < w; i += 2) - { - dst[i + 0] = v; - dst[i + 1] = v; - } - } - - t->Unmap(); - } -} - diff --git a/plugins/GSdx_legacy/GSDeviceSW.h b/plugins/GSdx_legacy/GSDeviceSW.h deleted file mode 100644 index 2488ec974e..0000000000 --- a/plugins/GSdx_legacy/GSDeviceSW.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDevice.h" -#include "GSTextureSW.h" - -class GSDeviceSW : public GSDevice -{ - GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format); - - void DoMerge(GSTexture* sTex[2], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, bool slbg, bool mmod, const GSVector4& c); - void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0); - - void Clear(GSTexture* t, uint32 c); - -public: - GSDeviceSW(); - - bool Create(GSWnd* wnd); - bool Reset(int w, int h); - - // drawing may be routed through here, the software renderers use the rasterizer directly now - - void BeginScene(); - void DrawPrimitive(); - void EndScene(); - - void ClearRenderTarget(GSTexture* t, const GSVector4& c); - void ClearRenderTarget(GSTexture* t, uint32 c); - void ClearDepth(GSTexture* t, float c); - void ClearStencil(GSTexture* t, uint8 c); - - GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0); - - void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true); - - void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1); - void PSSetShaderResource(int i, GSTexture* sRect); - void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); -}; - diff --git a/plugins/GSdx_legacy/GSDialog.cpp b/plugins/GSdx_legacy/GSDialog.cpp deleted file mode 100644 index 7c55d67570..0000000000 --- a/plugins/GSdx_legacy/GSDialog.cpp +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "StdAfx.h" -#include -#include -#include "GSdx.h" -#include "GSDialog.h" -#include "GSVector.h" - -GSDialog::GSDialog(UINT id) - : m_id(id) - , m_hWnd(NULL) -{ -} - -INT_PTR GSDialog::DoModal() -{ - return DialogBoxParam(theApp.GetModuleHandle(), MAKEINTRESOURCE(m_id), GetActiveWindow(), DialogProc, (LPARAM)this); -} - -INT_PTR CALLBACK GSDialog::DialogProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) -{ - GSDialog* dlg = NULL; - - if(message == WM_INITDIALOG) - { - dlg = (GSDialog*)lParam; - SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)dlg); - dlg->m_hWnd = hWnd; - - MONITORINFO mi; - mi.cbSize = sizeof(mi); - GetMonitorInfo(MonitorFromWindow(hWnd, MONITOR_DEFAULTTONEAREST), &mi); - - GSVector4i r; - GetWindowRect(hWnd, r); - - int x = (mi.rcWork.left + mi.rcWork.right - r.width()) / 2; - int y = (mi.rcWork.top + mi.rcWork.bottom - r.height()) / 2; - - SetWindowPos(hWnd, NULL, x, y, -1, -1, SWP_NOSIZE | SWP_NOZORDER | SWP_NOACTIVATE); - - dlg->OnInit(); - - return true; - } - - dlg = (GSDialog*)GetWindowLongPtr(hWnd, GWLP_USERDATA); - - if (message == WM_NOTIFY) - { - if (((LPNMHDR)lParam)->code == TTN_GETDISPINFO) - { - LPNMTTDISPINFO pInfo = (LPNMTTDISPINFO)lParam; - UINT id = GetWindowLongPtr((HWND)pInfo->hdr.idFrom, GWL_ID); - - // lpszText is used only if hinst is NULL. Seems to be NULL already, - // but it can't hurt to explicitly set it. - pInfo->hinst = NULL; - pInfo->lpszText = (LPTSTR)dialog_message(id); - SendMessage(pInfo->hdr.hwndFrom, TTM_SETMAXTIPWIDTH, 0, 500); - return true; - } - } - - return dlg != NULL ? dlg->OnMessage(message, wParam, lParam) : FALSE; -} - -// Tooltips will only show if the TOOLINFO cbSize <= the struct size. If it's -// smaller some functionality might be disabled. So let's try and use the -// correct size. -UINT GSDialog::GetTooltipStructSize() -{ - DLLGETVERSIONPROC dllGetVersion = (DLLGETVERSIONPROC)GetProcAddress(GetModuleHandle("ComCtl32.dll"), "DllGetVersion"); - if (dllGetVersion) { - DLLVERSIONINFO2 dllversion = { 0 }; - dllversion.info1.cbSize = sizeof(DLLVERSIONINFO2); - - if (dllGetVersion((DLLVERSIONINFO*)&dllversion) == S_OK) { - // Minor, then major version. - DWORD version = MAKELONG(dllversion.info1.dwMinorVersion, dllversion.info1.dwMajorVersion); - DWORD tooltip_v3 = MAKELONG(0, 6); - if (version >= tooltip_v3) - return TTTOOLINFOA_V3_SIZE; - } - } - // Should be fine for XP and onwards, comctl versions >= 4.7 should at least - // be this size. - return TTTOOLINFOA_V2_SIZE; -} - -bool GSDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) -{ - return message == WM_COMMAND ? OnCommand((HWND)lParam, LOWORD(wParam), HIWORD(wParam)) : false; -} - -bool GSDialog::OnCommand(HWND hWnd, UINT id, UINT code) -{ - if(id == IDOK || id == IDCANCEL) - { - EndDialog(m_hWnd, id); - - return true; - } - - return false; -} - -string GSDialog::GetText(UINT id) -{ - string s; - - char* buff = NULL; - - for(int size = 256, limit = 65536; size < limit; size <<= 1) - { - buff = new char[size]; - - if(GetDlgItemText(m_hWnd, id, buff, size)) - { - s = buff; - size = limit; - } - - delete [] buff; - } - - return s; -} - -int GSDialog::GetTextAsInt(UINT id) -{ - return atoi(GetText(id).c_str()); -} - -void GSDialog::SetText(UINT id, const char* str) -{ - SetDlgItemText(m_hWnd, id, str); -} - -void GSDialog::SetTextAsInt(UINT id, int i) -{ - char buff[32] = {0}; - itoa(i, buff, 10); - SetText(id, buff); -} - -void GSDialog::ComboBoxInit(UINT id, const vector& settings, int32_t selectionValue, int32_t maxValue) -{ - HWND hWnd = GetDlgItem(m_hWnd, id); - - SendMessage(hWnd, CB_RESETCONTENT, 0, 0); - - for(size_t i = 0; i < settings.size(); i++) - { - const GSSetting& s = settings[i]; - - if(s.value <= maxValue) - { - string str(s.name); - - if(!s.note.empty()) - { - str = str + " (" + s.note + ")"; - } - - ComboBoxAppend(id, str.c_str(), (LPARAM)s.value, s.value == selectionValue); - } - } - - ComboBoxFixDroppedWidth(id); -} - -int GSDialog::ComboBoxAppend(UINT id, const char* str, LPARAM data, bool select) -{ - HWND hWnd = GetDlgItem(m_hWnd, id); - - int item = (int)SendMessage(hWnd, CB_ADDSTRING, 0, (LPARAM)str); - - SendMessage(hWnd, CB_SETITEMDATA, item, (LPARAM)data); - - if(select) - { - SendMessage(hWnd, CB_SETCURSEL, item, 0); - } - - return item; -} - -bool GSDialog::ComboBoxGetSelData(UINT id, INT_PTR& data) -{ - HWND hWnd = GetDlgItem(m_hWnd, id); - - int item = SendMessage(hWnd, CB_GETCURSEL, 0, 0); - - if(item >= 0) - { - data = SendMessage(hWnd, CB_GETITEMDATA, item, 0); - - return true; - } - - return false; -} - -void GSDialog::ComboBoxFixDroppedWidth(UINT id) -{ - HWND hWnd = GetDlgItem(m_hWnd, id); - - int count = (int)SendMessage(hWnd, CB_GETCOUNT, 0, 0); - - if(count > 0) - { - HDC hDC = GetDC(hWnd); - - SelectObject(hDC, (HFONT)SendMessage(hWnd, WM_GETFONT, 0, 0)); - - int width = (int)SendMessage(hWnd, CB_GETDROPPEDWIDTH, 0, 0); - - for(int i = 0; i < count; i++) - { - int len = (int)SendMessage(hWnd, CB_GETLBTEXTLEN, i, 0); - - if(len > 0) - { - char* buff = new char[len + 1]; - - SendMessage(hWnd, CB_GETLBTEXT, i, (LPARAM)buff); - - SIZE size; - - if(GetTextExtentPoint32(hDC, buff, strlen(buff), &size)) - { - size.cx += 10; - - if(size.cx > width) width = size.cx; - } - - delete [] buff; - } - } - - ReleaseDC(hWnd, hDC); - - if(width > 0) - { - SendMessage(hWnd, CB_SETDROPPEDWIDTH, width, 0); - } - } -} - -void GSDialog::OpenFileDialog(UINT id, const char *title) -{ - char filename[512]; - OPENFILENAME ofn = { 0 }; - ofn.lStructSize = sizeof(OPENFILENAME); - ofn.hwndOwner = m_hWnd; - ofn.Flags = OFN_EXPLORER | OFN_FILEMUSTEXIST; - ofn.lpstrFile = filename; - ofn.lpstrFile[0] = 0; - ofn.nMaxFile = 512; - ofn.lpstrTitle = title; - - // GetOpenFileName changes the current directory, so we need to save and - // restore the current directory or everything using relative paths will - // break. - char current_directory[512]; - GetCurrentDirectory(512, current_directory); - - if (GetOpenFileName(&ofn)) - SendMessage(GetDlgItem(m_hWnd, id), WM_SETTEXT, 0, (LPARAM)filename); - - SetCurrentDirectory(current_directory); - -} - -void GSDialog::AddTooltip(UINT id) -{ - static UINT tooltipStructSize = GetTooltipStructSize(); - bool hasTooltip; - - dialog_message(id, &hasTooltip); - if (!hasTooltip) - return; - - HWND hWnd = GetDlgItem(m_hWnd, id); - if (hWnd == NULL) - return; - - // TTS_NOPREFIX allows tabs and '&' to be used. - HWND hwndTip = CreateWindowEx(WS_EX_TOPMOST, TOOLTIPS_CLASS, NULL, - TTS_ALWAYSTIP | TTS_NOPREFIX, - CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, - m_hWnd, NULL, theApp.GetModuleHandle(), NULL); - if (hwndTip == NULL) - return; - - TOOLINFO toolInfo = { 0 }; - toolInfo.cbSize = tooltipStructSize; - toolInfo.hwnd = m_hWnd; - toolInfo.uFlags = TTF_IDISHWND | TTF_SUBCLASS; - toolInfo.uId = (UINT_PTR)hWnd; - // Can't directly add the tooltip string - it doesn't work for long messages - toolInfo.lpszText = LPSTR_TEXTCALLBACK; - SendMessage(hwndTip, TTM_ADDTOOL, 0, (LPARAM)&toolInfo); - // 32.767s is the max show time. - SendMessage(hwndTip, TTM_SETDELAYTIME, TTDT_AUTOPOP, 32767); -} - -void GSDialog::InitCommonControls() -{ - INITCOMMONCONTROLSEX icex; - icex.dwSize = sizeof(INITCOMMONCONTROLSEX); - icex.dwICC = ICC_TAB_CLASSES; - - InitCommonControlsEx(&icex); -} diff --git a/plugins/GSdx_legacy/GSDialog.h b/plugins/GSdx_legacy/GSDialog.h deleted file mode 100644 index 905db3e9df..0000000000 --- a/plugins/GSdx_legacy/GSDialog.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSSetting.h" - -class GSDialog -{ - int m_id; - - static INT_PTR CALLBACK DialogProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); - static UINT GetTooltipStructSize(); - -protected: - HWND m_hWnd; - - virtual void OnInit() {} - virtual bool OnMessage(UINT message, WPARAM wParam, LPARAM lParam); - virtual bool OnCommand(HWND hWnd, UINT id, UINT code); - -public: - GSDialog(UINT id); - virtual ~GSDialog() {} - - int GetId() const {return m_id;} - - INT_PTR DoModal(); - - string GetText(UINT id); - int GetTextAsInt(UINT id); - - void SetText(UINT id, const char* str); - void SetTextAsInt(UINT id, int i); - - void ComboBoxInit(UINT id, const vector& settings, int32_t selectionValue, int32_t maxValue = INT32_MAX); - int ComboBoxAppend(UINT id, const char* str, LPARAM data = 0, bool select = false); - bool ComboBoxGetSelData(UINT id, INT_PTR& data); - void ComboBoxFixDroppedWidth(UINT id); - - void OpenFileDialog(UINT id, const char *title); - - void AddTooltip(UINT id); - - static void InitCommonControls(); -}; diff --git a/plugins/GSdx_legacy/GSDirtyRect.cpp b/plugins/GSdx_legacy/GSDirtyRect.cpp deleted file mode 100644 index e9efc0d10b..0000000000 --- a/plugins/GSdx_legacy/GSDirtyRect.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDirtyRect.h" - -GSDirtyRect::GSDirtyRect() - : psm(PSM_PSMCT32) -{ - left = top = right = bottom = 0; -} - -GSDirtyRect::GSDirtyRect(const GSVector4i& r, uint32 psm) - : psm(psm) -{ - left = r.left; - top = r.top; - right = r.right; - bottom = r.bottom; -} - -GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0) -{ - GSVector4i r; - - GSVector2i src = GSLocalMemory::m_psm[psm].bs; - - if(psm != TEX0.PSM) - { - GSVector2i dst = GSLocalMemory::m_psm[TEX0.PSM].bs; - - r.left = left * dst.x / src.x; - r.top = top * dst.y / src.y; - r.right = right * dst.x / src.x; - r.bottom = bottom * dst.y / src.y; - } - else - { - r = GSVector4i(left, top, right, bottom).ralign(src); - } - - return r; -} - -// - -GSVector4i GSDirtyRectList::GetDirtyRectAndClear(const GIFRegTEX0& TEX0, const GSVector2i& size) -{ - if(!empty()) - { - GSVector4i r(INT_MAX, INT_MAX, 0, 0); - - for(list::iterator i = begin(); i != end(); i++) - { - r = r.runion(i->GetDirtyRect(TEX0)); - } - - clear(); - - GSVector2i bs = GSLocalMemory::m_psm[TEX0.PSM].bs; - - return r.ralign(bs).rintersect(GSVector4i(0, 0, size.x, size.y)); - } - - return GSVector4i::zero(); -} diff --git a/plugins/GSdx_legacy/GSDirtyRect.h b/plugins/GSdx_legacy/GSDirtyRect.h deleted file mode 100644 index e2468b6f4e..0000000000 --- a/plugins/GSdx_legacy/GSDirtyRect.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSLocalMemory.h" - -class GSDirtyRect -{ - int left; - int top; - int right; - int bottom; - - uint32 psm; - -public: - GSDirtyRect(); - GSDirtyRect(const GSVector4i& r, uint32 psm); - GSVector4i GetDirtyRect(const GIFRegTEX0& TEX0); -}; - -class GSDirtyRectList : public list -{ -public: - GSDirtyRectList() {} - GSVector4i GetDirtyRectAndClear(const GIFRegTEX0& TEX0, const GSVector2i& size); -}; diff --git a/plugins/GSdx_legacy/GSDrawScanline.cpp b/plugins/GSdx_legacy/GSDrawScanline.cpp deleted file mode 100644 index 3cf739d071..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanline.cpp +++ /dev/null @@ -1,2965 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDrawScanline.h" -#include "GSTextureCacheSW.h" - -GSDrawScanline::GSDrawScanline() - : m_sp_map("GSSetupPrim", &m_local) - , m_ds_map("GSDrawScanline", &m_local) -{ - memset(&m_local, 0, sizeof(m_local)); - - m_local.gd = &m_global; -} - -GSDrawScanline::~GSDrawScanline() -{ -} - -void GSDrawScanline::BeginDraw(const GSRasterizerData* data) -{ - memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global)); - - if(m_global.sel.mmin && m_global.sel.lcm) - { - GSVector4i v = m_global.t.minmax.srl16(m_global.lod.i.extract32<0>());//.x); - - v = v.upl16(v); - - m_local.temp.uv_minmax[0] = v.upl32(v); - m_local.temp.uv_minmax[1] = v.uph32(v); - } - - m_ds = m_ds_map[m_global.sel]; - - if(m_global.sel.aa1) - { - GSScanlineSelector sel; - - sel.key = m_global.sel.key; - sel.zwrite = 0; - sel.edge = 1; - - m_de = m_ds_map[sel]; - } - else - { - m_de = NULL; - } - - if(m_global.sel.IsSolidRect()) - { - m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect; - } - else - { - m_dr = NULL; - } - - // doesn't need all bits => less functions generated - - GSScanlineSelector sel; - - sel.key = 0; - - sel.iip = m_global.sel.iip; - sel.tfx = m_global.sel.tfx; - sel.tcc = m_global.sel.tcc; - sel.fst = m_global.sel.fst; - sel.fge = m_global.sel.fge; - sel.prim = m_global.sel.prim; - sel.fb = m_global.sel.fb; - sel.zb = m_global.sel.zb; - sel.zoverflow = m_global.sel.zoverflow; - sel.notest = m_global.sel.notest; - - m_sp = m_sp_map[sel]; -} - -void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total) -{ - m_ds_map.UpdateStats(frame, ticks, actual, total); -} - -#ifndef ENABLE_JIT_RASTERIZER - -void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) -{ - GSScanlineSelector sel = m_global.sel; - - bool has_z = sel.zb != 0; - bool has_f = sel.fb && sel.fge; - bool has_t = sel.fb && sel.tfx != TFX_NONE; - bool has_c = sel.fb && !(sel.tfx == TFX_DECAL && sel.tcc); - - #if _M_SSE >= 0x501 - - const GSVector8* shift = GSSetupPrimCodeGenerator::m_shift; - - if(has_z || has_f) - { - if(sel.prim != GS_SPRITE_CLASS) - { - GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]); - - if(has_f) - { - m_local.d8.p.f = GSVector4i(dp8).extract32<3>(); - - GSVector8 df = GSVector8::broadcast32(&dscan.p.w); - - for(int i = 0; i < 8; i++) - { - m_local.d[i].f = GSVector8i(df * shift[1 + i]).xxzzlh(); - } - } - - if(has_z) - { - m_local.d8.p.z = dp8.extract32<2>(); - - GSVector8 dz = GSVector8::broadcast32(&dscan.p.z); - - for(int i = 0; i < 8; i++) - { - m_local.d[i].z = dz * shift[1 + i]; - } - } - } - else - { - if(has_f) - { - m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>(); - } - - if(has_z) - { - m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w - } - } - } - - if(has_t) - { - GSVector4 dt8 = dscan.t * GSVector4::broadcast32(&shift[0]); - - if(sel.fst) - { - m_local.d8.stq = GSVector4::cast(GSVector4i(dt8)); - } - else - { - m_local.d8.stq = dt8; - } - - GSVector8 dt(dscan.t); - - for(int j = 0, k = sel.fst ? 2 : 3; j < k; j++) - { - GSVector8 dstq; - - switch(j) - { - case 0: dstq = dt.xxxx(); break; - case 1: dstq = dt.yyyy(); break; - case 2: dstq = dt.zzzz(); break; - } - - for(int i = 0; i < 8; i++) - { - GSVector8 v = dstq * shift[1 + i]; - - if(sel.fst) - { - switch(j) - { - case 0: m_local.d[i].s = GSVector8::cast(GSVector8i(v)); break; - case 1: m_local.d[i].t = GSVector8::cast(GSVector8i(v)); break; - } - } - else - { - switch(j) - { - case 0: m_local.d[i].s = v; break; - case 1: m_local.d[i].t = v; break; - case 2: m_local.d[i].q = v; break; - } - } - } - } - } - - if(has_c) - { - if(sel.iip) - { - GSVector4 dc8 = dscan.c * GSVector4::broadcast32(&shift[0]); - - GSVector4i::storel(&m_local.d8.c, GSVector4i(dc8).xzyw().ps32()); - - GSVector8 dc(dscan.c); - - GSVector8 dr = dc.xxxx(); - GSVector8 db = dc.zzzz(); - - for(int i = 0; i < 8; i++) - { - GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); - GSVector8i b = GSVector8i(db * shift[1 + i]).ps32(); - - m_local.d[i].rb = r.upl16(b); - } - - GSVector8 dg = dc.yyyy(); - GSVector8 da = dc.wwww(); - - for(int i = 0; i < 8; i++) - { - GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); - GSVector8i a = GSVector8i(da * shift[1 + i]).ps32(); - - m_local.d[i].ga = g.upl16(a); - } - } - else - { - int last = 0; - - switch(sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - GSVector8i c = GSVector8i(GSVector8(vertex[index[last]].c)); - - c = c.upl16(c.zwxy()); - - if(sel.tfx == TFX_NONE) c = c.srl16(7); - - m_local.c.rb = c.xxxx(); - m_local.c.ga = c.zzzz(); - } - } - - #else - - const GSVector4* shift = GSSetupPrimCodeGenerator::m_shift; - - if(has_z || has_f) - { - if(sel.prim != GS_SPRITE_CLASS) - { - if(has_f) - { - GSVector4 df = dscan.p.wwww(); - - m_local.d4.f = GSVector4i(df * shift[0]).xxzzlh(); - - for(int i = 0; i < 4; i++) - { - m_local.d[i].f = GSVector4i(df * shift[1 + i]).xxzzlh(); - } - } - - if(has_z) - { - GSVector4 dz = dscan.p.zzzz(); - - m_local.d4.z = dz * shift[0]; - - for(int i = 0; i < 4; i++) - { - m_local.d[i].z = dz * shift[1 + i]; - } - } - } - else - { - if(has_f) - { - m_local.p.f = GSVector4i(vertex[index[1]].p).zzzzh().zzzz(); - } - - if(has_z) - { - m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w - } - } - } - - if(has_t) - { - GSVector4 t = dscan.t; - - if(sel.fst) - { - m_local.d4.stq = GSVector4::cast(GSVector4i(t * shift[0])); - } - else - { - m_local.d4.stq = t * shift[0]; - } - - for(int j = 0, k = sel.fst ? 2 : 3; j < k; j++) - { - GSVector4 dstq; - - switch(j) - { - case 0: dstq = t.xxxx(); break; - case 1: dstq = t.yyyy(); break; - case 2: dstq = t.zzzz(); break; - } - - for(int i = 0; i < 4; i++) - { - GSVector4 v = dstq * shift[1 + i]; - - if(sel.fst) - { - switch(j) - { - case 0: m_local.d[i].s = GSVector4::cast(GSVector4i(v)); break; - case 1: m_local.d[i].t = GSVector4::cast(GSVector4i(v)); break; - } - } - else - { - switch(j) - { - case 0: m_local.d[i].s = v; break; - case 1: m_local.d[i].t = v; break; - case 2: m_local.d[i].q = v; break; - } - } - } - } - } - - if(has_c) - { - if(sel.iip) - { - m_local.d4.c = GSVector4i(dscan.c * shift[0]).xzyw().ps32(); - - GSVector4 dr = dscan.c.xxxx(); - GSVector4 db = dscan.c.zzzz(); - - for(int i = 0; i < 4; i++) - { - GSVector4i r = GSVector4i(dr * shift[1 + i]).ps32(); - GSVector4i b = GSVector4i(db * shift[1 + i]).ps32(); - - m_local.d[i].rb = r.upl16(b); - } - - GSVector4 dg = dscan.c.yyyy(); - GSVector4 da = dscan.c.wwww(); - - for(int i = 0; i < 4; i++) - { - GSVector4i g = GSVector4i(dg * shift[1 + i]).ps32(); - GSVector4i a = GSVector4i(da * shift[1 + i]).ps32(); - - m_local.d[i].ga = g.upl16(a); - } - } - else - { - int last = 0; - - switch(sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - GSVector4i c = GSVector4i(vertex[index[last]].c); - - c = c.upl16(c.zwxy()); - - if(sel.tfx == TFX_NONE) c = c.srl16(7); - - m_local.c.rb = c.xxxx(); - m_local.c.ga = c.zzzz(); - } - } - - #endif -} - -void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) -{ - GSScanlineSelector sel = m_global.sel; - - #if _M_SSE >= 0x501 - - GSVector8i test; - GSVector8 zo; - GSVector8i f; - GSVector8 s, t, q; - GSVector8i uf, vf; - GSVector8i rbf, gaf; - GSVector8i cov; - - // Init - - int skip, steps; - - if(!sel.notest) - { - skip = left & 7; - steps = pixels + skip - 8; - left -= skip; - test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[skip]) | GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]); - } - else - { - skip = 0; - steps = pixels - 8; - } - - ASSERT((left & 7) == 0); - - const GSVector2i* fza_base = &m_global.fzbr[top]; - const GSVector2i* fza_offset = &m_global.fzbc[left >> 2]; - - if(sel.prim != GS_SPRITE_CLASS) - { - if(sel.fwrite && sel.fge) - { - f = GSVector8i::broadcast16(GSVector4i(scan.p).srl<12>()).add16(m_local.d[skip].f); - } - - if(sel.zb) - { - zo = m_local.d[skip].z; - } - } - - if(sel.fb) - { - if(sel.edge) - { - cov = GSVector8i::broadcast16(GSVector4i::cast(scan.t).srl<12>()).srl16(9); - } - - if(sel.tfx != TFX_NONE) - { - if(sel.fst) - { - GSVector4i vt(scan.t); - - GSVector8i u = GSVector8i::broadcast32(vt.xxxx()) + GSVector8i::cast(m_local.d[skip].s); - GSVector8i v = GSVector8i::broadcast32(vt.yyyy()); - - if(sel.prim != GS_SPRITE_CLASS || sel.mmin) - { - v += GSVector8i::cast(m_local.d[skip].t); - } - else if(sel.ltf) - { - vf = v.xxzzlh().srl16(12); - } - - s = GSVector8::cast(u); - t = GSVector8::cast(v); - } - else - { - s = GSVector8::broadcast32(&scan.t.x) + m_local.d[skip].s; - t = GSVector8::broadcast32(&scan.t.y) + m_local.d[skip].t; - q = GSVector8::broadcast32(&scan.t.z) + m_local.d[skip].q; - } - } - - if(!(sel.tfx == TFX_DECAL && sel.tcc)) - { - if(sel.iip) - { - GSVector4i c(scan.c); - - c = c.upl16(c.zwxy()); - - rbf = GSVector8i::broadcast32(&c.x).add16(m_local.d[skip].rb); - gaf = GSVector8i::broadcast32(&c.z).add16(m_local.d[skip].ga); - } - else - { - rbf = m_local.c.rb; - gaf = m_local.c.ga; - } - } - } - - while(1) - { - do - { - int fa = 0, za = 0; - GSVector8i fd, zs, zd; - GSVector8i fm, zm; - GSVector8i rb, ga; - - // TestZ - - if(sel.zb) - { - za = fza_base->y + fza_offset->y; - - if(sel.prim != GS_SPRITE_CLASS) - { - GSVector8 z = GSVector8::broadcast32(&scan.p.z) + zo; - - if(sel.zoverflow) - { - zs = (GSVector8i(z * 0.5f) << 1) | (GSVector8i(z) & GSVector8i::x00000001()); - } - else - { - zs = GSVector8i(z); - } - } - else - { - zs = GSVector8i::broadcast32(&m_local.p.z); - } - - if(sel.ztest) - { - zd = GSVector8i::load( - (uint8*)m_global.vm + za * 2, (uint8*)m_global.vm + za * 2 + 16, - (uint8*)m_global.vm + za * 2 + 32, (uint8*)m_global.vm + za * 2 + 48); - - switch(sel.zpsm) - { - case 1: zd = zd.sll32(8).srl32(8); break; - case 2: zd = zd.sll32(16).srl32(16); break; - default: break; - } - - GSVector8i zso = zs; - GSVector8i zdo = zd; - - if(sel.zoverflow || sel.zpsm == 0) - { - zso -= GSVector8i::x80000000(); - zdo -= GSVector8i::x80000000(); - } - - switch(sel.ztst) - { - case ZTST_GEQUAL: test |= zso < zdo; break; - case ZTST_GREATER: test |= zso <= zdo; break; - } - - if(test.alltrue()) continue; - } - } - - // SampleTexture - - if(sel.fb && sel.tfx != TFX_NONE) - { - GSVector8i u, v, uv[2]; - GSVector8i lodi, lodf; - GSVector8i minuv, maxuv; - GSVector8i addr00, addr01, addr10, addr11; - GSVector8i c00, c01, c10, c11; - - if(sel.mmin) - { - if(!sel.fst) - { - GSVector8 qrcp = q.rcp(); - - u = GSVector8i(s * qrcp); - v = GSVector8i(t * qrcp); - } - else - { - u = GSVector8i::cast(s); - v = GSVector8i::cast(t); - } - - if(!sel.lcm) - { - GSVector8 tmp = q.log2(3) * m_global.l + m_global.k; // (-log2(Q) * (1 << L) + K) * 0x10000 - - GSVector8i lod = GSVector8i(tmp.sat(GSVector8::zero(), m_global.mxl), false); - - if(sel.mmin == 1) // round-off mode - { - lod += 0x8000; - } - - lodi = lod.srl32(16); - - if(sel.mmin == 2) // trilinear mode - { - lodf = lod.xxzzlh(); - } - - // shift u/v by (int)lod - - u = u.srav32(lodi); - v = v.srav32(lodi); - - uv[0] = u.srav32(lodi); - uv[1] = v.srav32(lodi); - - GSVector8i tmin = GSVector8i::broadcast128(m_global.t.min); - GSVector8i tminu = tmin.upl16().srlv32(lodi); - GSVector8i tminv = tmin.uph16().srlv32(lodi); - - GSVector8i tmax = GSVector8i::broadcast128(m_global.t.max); - GSVector8i tmaxu = tmax.upl16().srlv32(lodi); - GSVector8i tmaxv = tmax.uph16().srlv32(lodi); - - minuv = tminu.pu32(tminv); - maxuv = tmaxu.pu32(tmaxv); - } - else - { - lodi = m_global.lod.i; - - u = u.srav32(lodi); - v = v.srav32(lodi); - - uv[0] = u; - uv[1] = v; - - minuv = m_local.temp.uv_minmax[0]; - maxuv = m_local.temp.uv_minmax[1]; - } - - if(sel.ltf) - { - u -= 0x8000; - v -= 0x8000; - - uf = u.xxzzlh().srl16(12); - vf = v.xxzzlh().srl16(12); - } - - GSVector8i uv0 = u.sra32(16).ps32(v.sra32(16)); - GSVector8i uv1 = uv0; - - { - GSVector8i repeat = (uv0 & minuv) | maxuv; - GSVector8i clamp = uv0.sat_i16(minuv, maxuv); - - uv0 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); - } - - if(sel.ltf) - { - uv1 = uv1.add16(GSVector8i::x0001()); - - GSVector8i repeat = (uv1 & minuv) | maxuv; - GSVector8i clamp = uv1.sat_i16(minuv, maxuv); - - uv1 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); - } - - GSVector8i y0 = uv0.uph16() << (sel.tw + 3); - GSVector8i x0 = uv0.upl16(); - - if(sel.ltf) - { - GSVector8i y1 = uv1.uph16() << (sel.tw + 3); - GSVector8i x1 = uv1.upl16(); - - addr00 = y0 + x0; - addr01 = y0 + x1; - addr10 = y1 + x0; - addr11 = y1 + x1; - - if(sel.tlu) - { - for(int i = 0; i < 8; i++) - { - const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]]; - - c00.u32[i] = m_global.clut[tex[addr00.u32[i]]]; - c01.u32[i] = m_global.clut[tex[addr01.u32[i]]]; - c10.u32[i] = m_global.clut[tex[addr10.u32[i]]]; - c11.u32[i] = m_global.clut[tex[addr11.u32[i]]]; - } - } - else - { - for(int i = 0; i < 8; i++) - { - const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]]; - - c00.u32[i] = tex[addr00.u32[i]]; - c01.u32[i] = tex[addr01.u32[i]]; - c10.u32[i] = tex[addr10.u32[i]]; - c11.u32[i] = tex[addr11.u32[i]]; - } - } - - GSVector8i rb00 = c00.sll16(8).srl16(8); - GSVector8i ga00 = c00.srl16(8); - GSVector8i rb01 = c01.sll16(8).srl16(8); - GSVector8i ga01 = c01.srl16(8); - - rb00 = rb00.lerp16_4(rb01, uf); - ga00 = ga00.lerp16_4(ga01, uf); - - GSVector8i rb10 = c10.sll16(8).srl16(8); - GSVector8i ga10 = c10.srl16(8); - GSVector8i rb11 = c11.sll16(8).srl16(8); - GSVector8i ga11 = c11.srl16(8); - - rb10 = rb10.lerp16_4(rb11, uf); - ga10 = ga10.lerp16_4(ga11, uf); - - rb = rb00.lerp16_4(rb10, vf); - ga = ga00.lerp16_4(ga10, vf); - } - else - { - addr00 = y0 + x0; - - if(sel.tlu) - { - for(int i = 0; i < 8; i++) - { - c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]]; - } - } - else - { - for(int i = 0; i < 8; i++) - { - c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]; - } - } - - rb = c00.sll16(8).srl16(8); - ga = c00.srl16(8); - } - - if(sel.mmin != 1) // !round-off mode - { - GSVector8i rb2, ga2; - - lodi += GSVector8i::x00000001(); - - u = uv[0].sra32(1); - v = uv[1].sra32(1); - - minuv = minuv.srl16(1); - maxuv = maxuv.srl16(1); - - if(sel.ltf) - { - u -= 0x8000; - v -= 0x8000; - - uf = u.xxzzlh().srl16(12); - vf = v.xxzzlh().srl16(12); - } - - GSVector8i uv0 = u.sra32(16).ps32(v.sra32(16)); - GSVector8i uv1 = uv0; - - { - GSVector8i repeat = (uv0 & minuv) | maxuv; - GSVector8i clamp = uv0.sat_i16(minuv, maxuv); - - uv0 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); - } - - if(sel.ltf) - { - uv1 = uv1.add16(GSVector8i::x0001()); - - GSVector8i repeat = (uv1 & minuv) | maxuv; - GSVector8i clamp = uv1.sat_i16(minuv, maxuv); - - uv1 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); - } - - GSVector8i y0 = uv0.uph16() << (sel.tw + 3); - GSVector8i x0 = uv0.upl16(); - - if(sel.ltf) - { - GSVector8i y1 = uv1.uph16() << (sel.tw + 3); - GSVector8i x1 = uv1.upl16(); - - addr00 = y0 + x0; - addr01 = y0 + x1; - addr10 = y1 + x0; - addr11 = y1 + x1; - - if(sel.tlu) - { - for(int i = 0; i < 8; i++) - { - const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]]; - - c00.u32[i] = m_global.clut[tex[addr00.u32[i]]]; - c01.u32[i] = m_global.clut[tex[addr01.u32[i]]]; - c10.u32[i] = m_global.clut[tex[addr10.u32[i]]]; - c11.u32[i] = m_global.clut[tex[addr11.u32[i]]]; - } - } - else - { - for(int i = 0; i < 8; i++) - { - const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]]; - - c00.u32[i] = tex[addr00.u32[i]]; - c01.u32[i] = tex[addr01.u32[i]]; - c10.u32[i] = tex[addr10.u32[i]]; - c11.u32[i] = tex[addr11.u32[i]]; - } - } - - GSVector8i rb00 = c00.sll16(8).srl16(8); - GSVector8i ga00 = c00.srl16(8); - GSVector8i rb01 = c01.sll16(8).srl16(8); - GSVector8i ga01 = c01.srl16(8); - - rb00 = rb00.lerp16_4(rb01, uf); - ga00 = ga00.lerp16_4(ga01, uf); - - GSVector8i rb10 = c10.sll16(8).srl16(8); - GSVector8i ga10 = c10.srl16(8); - GSVector8i rb11 = c11.sll16(8).srl16(8); - GSVector8i ga11 = c11.srl16(8); - - rb10 = rb10.lerp16_4(rb11, uf); - ga10 = ga10.lerp16_4(ga11, uf); - - rb2 = rb00.lerp16_4(rb10, vf); - ga2 = ga00.lerp16_4(ga10, vf); - } - else - { - addr00 = y0 + x0; - - if(sel.tlu) - { - for(int i = 0; i < 8; i++) - { - c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]]; - } - } - else - { - for(int i = 0; i < 8; i++) - { - c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]; - } - } - - rb2 = c00.sll16(8).srl16(8); - ga2 = c00.srl16(8); - } - - if(sel.lcm) lodf = m_global.lod.f; - - lodf = lodf.srl16(1); - - rb = rb.lerp16<0>(rb2, lodf); - ga = ga.lerp16<0>(ga2, lodf); - } - } - else - { - if(!sel.fst) - { - GSVector8 qrcp = q.rcp(); - - u = GSVector8i(s * qrcp); - v = GSVector8i(t * qrcp); - - if(sel.ltf) - { - u -= 0x8000; - v -= 0x8000; - } - } - else - { - u = GSVector8i::cast(s); - v = GSVector8i::cast(t); - } - - if(sel.ltf) - { - uf = u.xxzzlh().srl16(12); - - if(sel.prim != GS_SPRITE_CLASS) - { - vf = v.xxzzlh().srl16(12); - } - } - - GSVector8i uv0 = u.sra32(16).ps32(v.sra32(16)); - GSVector8i uv1 = uv0; - - GSVector8i tmin = GSVector8i::broadcast128(m_global.t.min); - GSVector8i tmax = GSVector8i::broadcast128(m_global.t.max); - - { - GSVector8i repeat = (uv0 & tmin) | tmax; - GSVector8i clamp = uv0.sat_i16(tmin, tmax); - - uv0 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); - } - - if(sel.ltf) - { - uv1 = uv1.add16(GSVector8i::x0001()); - - GSVector8i repeat = (uv1 & tmin) | tmax; - GSVector8i clamp = uv1.sat_i16(tmin, tmax); - - uv1 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); - } - - GSVector8i y0 = uv0.uph16() << (sel.tw + 3); - GSVector8i x0 = uv0.upl16(); - - if(sel.ltf) - { - GSVector8i y1 = uv1.uph16() << (sel.tw + 3); - GSVector8i x1 = uv1.upl16(); - - addr00 = y0 + x0; - addr01 = y0 + x1; - addr10 = y1 + x0; - addr11 = y1 + x1; - - if(sel.tlu) - { - const uint8* tex = (const uint8*)m_global.tex[0]; - - c00 = addr00.gather32_32(tex, m_global.clut); - c01 = addr01.gather32_32(tex, m_global.clut); - c10 = addr10.gather32_32(tex, m_global.clut); - c11 = addr11.gather32_32(tex, m_global.clut); - } - else - { - const uint32* tex = (const uint32*)m_global.tex[0]; - - c00 = addr00.gather32_32(tex); - c01 = addr01.gather32_32(tex); - c10 = addr10.gather32_32(tex); - c11 = addr11.gather32_32(tex); - } - - GSVector8i rb00 = c00.sll16(8).srl16(8); - GSVector8i ga00 = c00.srl16(8); - GSVector8i rb01 = c01.sll16(8).srl16(8); - GSVector8i ga01 = c01.srl16(8); - - rb00 = rb00.lerp16_4(rb01, uf); - ga00 = ga00.lerp16_4(ga01, uf); - - GSVector8i rb10 = c10.sll16(8).srl16(8); - GSVector8i ga10 = c10.srl16(8); - GSVector8i rb11 = c11.sll16(8).srl16(8); - GSVector8i ga11 = c11.srl16(8); - - rb10 = rb10.lerp16_4(rb11, uf); - ga10 = ga10.lerp16_4(ga11, uf); - - rb = rb00.lerp16_4(rb10, vf); - ga = ga00.lerp16_4(ga10, vf); - } - else - { - addr00 = y0 + x0; - - if(sel.tlu) - { - c00 = addr00.gather32_32((const uint8*)m_global.tex[0], m_global.clut); - } - else - { - c00 = addr00.gather32_32((const uint32*)m_global.tex[0]); - } - - rb = c00.sll16(8).srl16(8); - ga = c00.srl16(8); - } - } - } - - // AlphaTFX - - if(sel.fb) - { - switch(sel.tfx) - { - case TFX_MODULATE: - ga = ga.modulate16<1>(gaf).clamp8(); - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_DECAL: - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_HIGHLIGHT: - ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); - break; - case TFX_HIGHLIGHT2: - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_NONE: - ga = sel.iip ? gaf.srl16(7) : gaf; - break; - } - - if(sel.aa1) - { - GSVector8i x00800080(0x00800080); - - GSVector8i a = sel.edge ? cov : x00800080; - - if(!sel.abe) - { - ga = ga.mix16(a); - } - else - { - ga = ga.blend8(a, ga.eq16(x00800080).srl32(16).sll32(16)); - } - } - } - - // ReadMask - - if(sel.fwrite) - { - fm = m_global.fm; - } - - if(sel.zwrite) - { - zm = m_global.zm; - } - - // TestAlpha - - if(!TestAlpha(test, fm, zm, ga)) continue; - - // ColorTFX - - if(sel.fwrite) - { - GSVector8i af; - - switch(sel.tfx) - { - case TFX_MODULATE: - rb = rb.modulate16<1>(rbf).clamp8(); - break; - case TFX_DECAL: - break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - af = gaf.yywwlh().srl16(7); - rb = rb.modulate16<1>(rbf).add16(af).clamp8(); - ga = ga.modulate16<1>(gaf).add16(af).clamp8().mix16(ga); - break; - case TFX_NONE: - rb = sel.iip ? rbf.srl16(7) : rbf; - break; - } - } - - // Fog - - if(sel.fwrite && sel.fge) - { - GSVector8i fog = sel.prim != GS_SPRITE_CLASS ? f : GSVector8i::broadcast16(&m_local.p.f); - - GSVector8i frb((int)m_global.frb); - GSVector8i fga((int)m_global.fga); - - rb = frb.lerp16<0>(rb, fog); - ga = fga.lerp16<0>(ga, fog).mix16(ga); - - /* - fog = fog.srl16(7); - - GSVector8i ifog = GSVector4i::x00ff().sub16(fog); - - rb = rb.mul16l(fog).add16(frb.mul16l(ifog)).srl16(8); - ga = ga.mul16l(fog).add16(fga.mul16l(ifog)).srl16(8).mix16(ga); - */ - } - - // ReadFrame - - if(sel.fb) - { - fa = fza_base->x + fza_offset->x; - - if(sel.rfb) - { - fd = GSVector8i::load( - (uint8*)m_global.vm + fa * 2, (uint8*)m_global.vm + fa * 2 + 16, - (uint8*)m_global.vm + fa * 2 + 32, (uint8*)m_global.vm + fa * 2 + 48); - } - } - - // TestDestAlpha - - if(sel.date && (sel.fpsm == 0 || sel.fpsm == 2)) - { - if(sel.datm) - { - if(sel.fpsm == 2) - { - // test |= fd.srl32(15) == GSVector8i::zero(); - test |= fd.sll32(16).sra32(31) == GSVector8i::zero(); - } - else - { - test |= (~fd).sra32(31); - } - } - else - { - if(sel.fpsm == 2) - { - test |= fd.sll32(16).sra32(31); // == GSVector8i::xffffffff(); - } - else - { - test |= fd.sra32(31); - } - } - - if(test.alltrue()) continue; - } - - // WriteMask - - int fzm = 0; - - if(!sel.notest) - { - if(sel.fwrite) - { - fm |= test; - } - - if(sel.zwrite) - { - zm |= test; - } - - if(sel.fwrite && sel.zwrite) - { - fzm = ~(fm == GSVector8i::xffffffff()).ps32(zm == GSVector8i::xffffffff()).mask(); - } - else if(sel.fwrite) - { - fzm = ~(fm == GSVector8i::xffffffff()).ps32().mask(); - } - else if(sel.zwrite) - { - fzm = ~(zm == GSVector8i::xffffffff()).ps32().mask(); - } - } - - // WriteZBuf - - if(sel.zwrite) - { - if(sel.ztest && sel.zpsm < 2) - { - zs = zs.blend8(zd, zm); - } - - bool fast = sel.ztest ? sel.zpsm < 2 : sel.zpsm == 0 && sel.notest; - - if(sel.notest) - { - if(fast) - { - GSVector4i::storel((uint8*)m_global.vm + za * 2, zs.extract<0>()); - GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs.extract<0>()); - GSVector4i::storel((uint8*)m_global.vm + za * 2 + 32, zs.extract<1>()); - GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 48, zs.extract<1>()); - } - else - { - WritePixel(zs, za, 0, sel.zpsm); - WritePixel(zs, za, 1, sel.zpsm); - WritePixel(zs, za, 2, sel.zpsm); - WritePixel(zs, za, 3, sel.zpsm); - WritePixel(zs, za, 4, sel.zpsm); - WritePixel(zs, za, 5, sel.zpsm); - WritePixel(zs, za, 6, sel.zpsm); - WritePixel(zs, za, 7, sel.zpsm); - } - } - else - { - if(fast) - { - if(fzm & 0x00000f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs.extract<0>()); - if(fzm & 0x0000f000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs.extract<0>()); - if(fzm & 0x0f000000) GSVector4i::storel((uint8*)m_global.vm + za * 2 + 32, zs.extract<1>()); - if(fzm & 0xf0000000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 48, zs.extract<1>()); - } - else - { - if(fzm & 0x00000300) WritePixel(zs, za, 0, sel.zpsm); - if(fzm & 0x00000c00) WritePixel(zs, za, 1, sel.zpsm); - if(fzm & 0x00003000) WritePixel(zs, za, 2, sel.zpsm); - if(fzm & 0x0000c000) WritePixel(zs, za, 3, sel.zpsm); - if(fzm & 0x03000000) WritePixel(zs, za, 4, sel.zpsm); - if(fzm & 0x0c000000) WritePixel(zs, za, 5, sel.zpsm); - if(fzm & 0x30000000) WritePixel(zs, za, 6, sel.zpsm); - if(fzm & 0xc0000000) WritePixel(zs, za, 7, sel.zpsm); - } - } - } - - // AlphaBlend - - if(sel.fwrite && (sel.abe || sel.aa1)) - { - GSVector8i rbs = rb, gas = ga, rbd, gad, a, mask; - - if(sel.aba != sel.abb && (sel.aba == 1 || sel.abb == 1 || sel.abc == 1) || sel.abd == 1) - { - switch(sel.fpsm) - { - case 0: - case 1: - rbd = fd.sll16(8).srl16(8); - gad = fd.srl16(8); - break; - case 2: - rbd = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - gad = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - break; - } - } - - if(sel.aba != sel.abb) - { - switch(sel.aba) - { - case 0: break; - case 1: rb = rbd; break; - case 2: rb = GSVector8i::zero(); break; - } - - switch(sel.abb) - { - case 0: rb = rb.sub16(rbs); break; - case 1: rb = rb.sub16(rbd); break; - case 2: break; - } - - if(!(sel.fpsm == 1 && sel.abc == 1)) - { - switch(sel.abc) - { - case 0: a = gas.yywwlh().sll16(7); break; - case 1: a = gad.yywwlh().sll16(7); break; - case 2: a = m_global.afix; break; - } - - rb = rb.modulate16<1>(a); - } - - switch(sel.abd) - { - case 0: rb = rb.add16(rbs); break; - case 1: rb = rb.add16(rbd); break; - case 2: break; - } - } - else - { - switch(sel.abd) - { - case 0: break; - case 1: rb = rbd; break; - case 2: rb = GSVector8i::zero(); break; - } - } - - if(sel.pabe) - { - mask = (gas << 8).sra32(31); - - rb = rbs.blend8(rb, mask); - } - - if(sel.aba != sel.abb) - { - switch(sel.aba) - { - case 0: break; - case 1: ga = gad; break; - case 2: ga = GSVector8i::zero(); break; - } - - switch(sel.abb) - { - case 0: ga = ga.sub16(gas); break; - case 1: ga = ga.sub16(gad); break; - case 2: break; - } - - if(!(sel.fpsm == 1 && sel.abc == 1)) - { - ga = ga.modulate16<1>(a); - } - - switch(sel.abd) - { - case 0: ga = ga.add16(gas); break; - case 1: ga = ga.add16(gad); break; - case 2: break; - } - } - else - { - switch(sel.abd) - { - case 0: break; - case 1: ga = gad; break; - case 2: ga = GSVector8i::zero(); break; - } - } - - if(sel.pabe) - { - ga = gas.blend8(ga, mask >> 16); - } - else - { - if(sel.fpsm != 1) - { - ga = ga.mix16(gas); - } - } - } - - // WriteFrame - - if(sel.fwrite) - { - if(sel.fpsm == 2 && sel.dthe) - { - int y = (top & 3) << 1; - - rb = rb.add16(GSVector8i::broadcast128(m_global.dimx[0 + y])); - ga = ga.add16(GSVector8i::broadcast128(m_global.dimx[1 + y])); - } - - if(sel.colclamp == 0) - { - rb &= GSVector8i::x00ff(); - ga &= GSVector8i::x00ff(); - } - - GSVector8i fs = rb.upl16(ga).pu16(rb.uph16(ga)); - - if(sel.fba && sel.fpsm != 1) - { - fs |= GSVector8i::x80000000(); - } - - if(sel.fpsm == 2) - { - GSVector8i rb = fs & 0x00f800f8; - GSVector8i ga = fs & 0x8000f800; - - fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); - } - - if(sel.rfb) - { - fs = fs.blend(fd, fm); - } - - bool fast = sel.rfb ? sel.fpsm < 2 : sel.fpsm == 0 && sel.notest; - - if(sel.notest) - { - if(fast) - { - GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs.extract<0>()); - GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs.extract<0>()); - GSVector4i::storel((uint8*)m_global.vm + fa * 2 + 32, fs.extract<1>()); - GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 48, fs.extract<1>()); - } - else - { - WritePixel(fs, fa, 0, sel.fpsm); - WritePixel(fs, fa, 1, sel.fpsm); - WritePixel(fs, fa, 2, sel.fpsm); - WritePixel(fs, fa, 3, sel.fpsm); - WritePixel(fs, fa, 4, sel.fpsm); - WritePixel(fs, fa, 5, sel.fpsm); - WritePixel(fs, fa, 6, sel.fpsm); - WritePixel(fs, fa, 7, sel.fpsm); - } - } - else - { - if(fast) - { - if(fzm & 0x0000000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs.extract<0>()); - if(fzm & 0x000000f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs.extract<0>()); - if(fzm & 0x000f0000) GSVector4i::storel((uint8*)m_global.vm + fa * 2 + 32, fs.extract<1>()); - if(fzm & 0x00f00000) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 48, fs.extract<1>()); - } - else - { - if(fzm & 0x00000003) WritePixel(fs, fa, 0, sel.fpsm); - if(fzm & 0x0000000c) WritePixel(fs, fa, 1, sel.fpsm); - if(fzm & 0x00000030) WritePixel(fs, fa, 2, sel.fpsm); - if(fzm & 0x000000c0) WritePixel(fs, fa, 3, sel.fpsm); - if(fzm & 0x00030000) WritePixel(fs, fa, 4, sel.fpsm); - if(fzm & 0x000c0000) WritePixel(fs, fa, 5, sel.fpsm); - if(fzm & 0x00300000) WritePixel(fs, fa, 6, sel.fpsm); - if(fzm & 0x00c00000) WritePixel(fs, fa, 7, sel.fpsm); - } - } - } - } - while(0); - - if(sel.edge) break; - - if(steps <= 0) break; - - // Step - - steps -= 8; - - fza_offset += 2; - - if(sel.prim != GS_SPRITE_CLASS) - { - if(sel.zb) - { - zo += GSVector8::broadcast32(&m_local.d8.p.z); - } - - if(sel.fwrite && sel.fge) - { - f = f.add16(GSVector8i::broadcast16(&m_local.d8.p.f)); - } - } - - if(sel.fb) - { - if(sel.tfx != TFX_NONE) - { - if(sel.fst) - { - GSVector8i stq = GSVector8i::cast(GSVector8(m_local.d8.stq)); - - s = GSVector8::cast(GSVector8i::cast(s) + stq.xxxx()); - - if(sel.prim != GS_SPRITE_CLASS || sel.mmin) - { - t = GSVector8::cast(GSVector8i::cast(t) + stq.yyyy()); - } - } - else - { - GSVector8 stq(m_local.d8.stq); - - s += stq.xxxx(); - t += stq.yyyy(); - q += stq.zzzz(); - } - } - } - - if(!(sel.tfx == TFX_DECAL && sel.tcc)) - { - if(sel.iip) - { - GSVector8i c = GSVector8i::broadcast64(&m_local.d8.c); - - rbf = rbf.add16(c.xxxx()).max_i16(GSVector8i::zero()); - gaf = gaf.add16(c.yyyy()).max_i16(GSVector8i::zero()); - } - } - - if(!sel.notest) - { - test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]); - } - } - - #else - - GSVector4i test; - GSVector4 zo; - GSVector4i f; - GSVector4 s, t, q; - GSVector4i uf, vf; - GSVector4i rbf, gaf; - GSVector4i cov; - - // Init - - int skip, steps; - - if(!sel.notest) - { - skip = left & 3; - steps = pixels + skip - 4; - left -= skip; - test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))]; - } - else - { - skip = 0; - steps = pixels - 4; - } - - ASSERT((left & 3) == 0); - - const GSVector2i* fza_base = &m_global.fzbr[top]; - const GSVector2i* fza_offset = &m_global.fzbc[left >> 2]; - - if(sel.prim != GS_SPRITE_CLASS) - { - if(sel.fwrite && sel.fge) - { - f = GSVector4i(scan.p).zzzzh().zzzz().add16(m_local.d[skip].f); - } - - if(sel.zb) - { - zo = m_local.d[skip].z; - } - } - - if(sel.fb) - { - if(sel.edge) - { - cov = GSVector4i::cast(scan.t).zzzzh().wwww().srl16(9); - } - - if(sel.tfx != TFX_NONE) - { - if(sel.fst) - { - GSVector4i vt(scan.t); - - GSVector4i u = vt.xxxx() + GSVector4i::cast(m_local.d[skip].s); - GSVector4i v = vt.yyyy(); - - if(sel.prim != GS_SPRITE_CLASS || sel.mmin) - { - v += GSVector4i::cast(m_local.d[skip].t); - } - else if(sel.ltf) - { - vf = v.xxzzlh().srl16(12); - } - - s = GSVector4::cast(u); - t = GSVector4::cast(v); - } - else - { - s = scan.t.xxxx() + m_local.d[skip].s; - t = scan.t.yyyy() + m_local.d[skip].t; - q = scan.t.zzzz() + m_local.d[skip].q; - } - } - - if(!(sel.tfx == TFX_DECAL && sel.tcc)) - { - if(sel.iip) - { - GSVector4i c(scan.c); - - c = c.upl16(c.zwxy()); - - rbf = c.xxxx().add16(m_local.d[skip].rb); - gaf = c.zzzz().add16(m_local.d[skip].ga); - } - else - { - rbf = m_local.c.rb; - gaf = m_local.c.ga; - } - } - } - - while(1) - { - do - { - int fa = 0, za = 0; - GSVector4i fd, zs, zd; - GSVector4i fm, zm; - GSVector4i rb, ga; - - // TestZ - - if(sel.zb) - { - za = fza_base->y + fza_offset->y; - - if(sel.prim != GS_SPRITE_CLASS) - { - GSVector4 z = scan.p.zzzz() + zo; - - if(sel.zoverflow) - { - zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); - } - else - { - zs = GSVector4i(z); - } - } - else - { - zs = m_local.p.z; - } - - if(sel.ztest) - { - zd = GSVector4i::load((uint8*)m_global.vm + za * 2, (uint8*)m_global.vm + za * 2 + 16); - - switch(sel.zpsm) - { - case 1: zd = zd.sll32(8).srl32(8); break; - case 2: zd = zd.sll32(16).srl32(16); break; - default: break; - } - - GSVector4i zso = zs; - GSVector4i zdo = zd; - - if(sel.zoverflow || sel.zpsm == 0) - { - zso -= GSVector4i::x80000000(); - zdo -= GSVector4i::x80000000(); - } - - switch(sel.ztst) - { - case ZTST_GEQUAL: test |= zso < zdo; break; - case ZTST_GREATER: test |= zso <= zdo; break; - } - - if(test.alltrue()) continue; - } - } - - // SampleTexture - - if(sel.fb && sel.tfx != TFX_NONE) - { - GSVector4i u, v, uv[2]; - GSVector4i lodi, lodf; - GSVector4i minuv, maxuv; - GSVector4i addr00, addr01, addr10, addr11; - GSVector4i c00, c01, c10, c11; - - if(sel.mmin) - { - if(!sel.fst) - { - GSVector4 qrcp = q.rcp(); - - u = GSVector4i(s * qrcp); - v = GSVector4i(t * qrcp); - } - else - { - u = GSVector4i::cast(s); - v = GSVector4i::cast(t); - } - - if(!sel.lcm) - { - GSVector4 tmp = q.log2(3) * m_global.l + m_global.k; // (-log2(Q) * (1 << L) + K) * 0x10000 - - GSVector4i lod = GSVector4i(tmp.sat(GSVector4::zero(), m_global.mxl), false); - - if(sel.mmin == 1) // round-off mode - { - lod += 0x8000; - } - - lodi = lod.srl32(16); - - if(sel.mmin == 2) // trilinear mode - { - lodf = lod.xxzzlh(); - } - - // shift u/v by (int)lod - - GSVector4i aabb = u.upl32(v); - GSVector4i ccdd = u.uph32(v); - - GSVector4i aaxx = aabb.sra32(lodi.x); - GSVector4i xxbb = aabb.sra32(lodi.y); - GSVector4i ccxx = ccdd.sra32(lodi.z); - GSVector4i xxdd = ccdd.sra32(lodi.w); - - GSVector4i acac = aaxx.upl32(ccxx); - GSVector4i bdbd = xxbb.uph32(xxdd); - - u = acac.upl32(bdbd); - v = acac.uph32(bdbd); - - uv[0] = u; - uv[1] = v; - - GSVector4i minmax = m_global.t.minmax; - - GSVector4i v0 = minmax.srl16(lodi.x); - GSVector4i v1 = minmax.srl16(lodi.y); - GSVector4i v2 = minmax.srl16(lodi.z); - GSVector4i v3 = minmax.srl16(lodi.w); - - v0 = v0.upl16(v1); - v2 = v2.upl16(v3); - - minuv = v0.upl32(v2); - maxuv = v0.uph32(v2); - } - else - { - lodi = m_global.lod.i; - - u = u.sra32(lodi.x); - v = v.sra32(lodi.x); - - uv[0] = u; - uv[1] = v; - - minuv = m_local.temp.uv_minmax[0]; - maxuv = m_local.temp.uv_minmax[1]; - } - - if(sel.ltf) - { - u -= 0x8000; - v -= 0x8000; - - uf = u.xxzzlh().srl16(12); - vf = v.xxzzlh().srl16(12); - } - - GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - GSVector4i uv1 = uv0; - - { - GSVector4i repeat = (uv0 & minuv) | maxuv; - GSVector4i clamp = uv0.sat_i16(minuv, maxuv); - - uv0 = clamp.blend8(repeat, m_global.t.mask); - } - - if(sel.ltf) - { - uv1 = uv1.add16(GSVector4i::x0001()); - - GSVector4i repeat = (uv1 & minuv) | maxuv; - GSVector4i clamp = uv1.sat_i16(minuv, maxuv); - - uv1 = clamp.blend8(repeat, m_global.t.mask); - } - - GSVector4i y0 = uv0.uph16() << (sel.tw + 3); - GSVector4i x0 = uv0.upl16(); - - if(sel.ltf) - { - GSVector4i y1 = uv1.uph16() << (sel.tw + 3); - GSVector4i x1 = uv1.upl16(); - - addr00 = y0 + x0; - addr01 = y0 + x1; - addr10 = y1 + x0; - addr11 = y1 + x1; - - if(sel.tlu) - { - for(int i = 0; i < 4; i++) - { - const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]]; - - c00.u32[i] = m_global.clut[tex[addr00.u32[i]]]; - c01.u32[i] = m_global.clut[tex[addr01.u32[i]]]; - c10.u32[i] = m_global.clut[tex[addr10.u32[i]]]; - c11.u32[i] = m_global.clut[tex[addr11.u32[i]]]; - } - } - else - { - for(int i = 0; i < 4; i++) - { - const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]]; - - c00.u32[i] = tex[addr00.u32[i]]; - c01.u32[i] = tex[addr01.u32[i]]; - c10.u32[i] = tex[addr10.u32[i]]; - c11.u32[i] = tex[addr11.u32[i]]; - } - } - - GSVector4i rb00 = c00.sll16(8).srl16(8); - GSVector4i ga00 = c00.srl16(8); - GSVector4i rb01 = c01.sll16(8).srl16(8); - GSVector4i ga01 = c01.srl16(8); - - rb00 = rb00.lerp16_4(rb01, uf); - ga00 = ga00.lerp16_4(ga01, uf); - - GSVector4i rb10 = c10.sll16(8).srl16(8); - GSVector4i ga10 = c10.srl16(8); - GSVector4i rb11 = c11.sll16(8).srl16(8); - GSVector4i ga11 = c11.srl16(8); - - rb10 = rb10.lerp16_4(rb11, uf); - ga10 = ga10.lerp16_4(ga11, uf); - - rb = rb00.lerp16_4(rb10, vf); - ga = ga00.lerp16_4(ga10, vf); - } - else - { - addr00 = y0 + x0; - - if(sel.tlu) - { - for(int i = 0; i < 4; i++) - { - c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]]; - } - } - else - { - for(int i = 0; i < 4; i++) - { - c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]; - } - } - - rb = c00.sll16(8).srl16(8); - ga = c00.srl16(8); - } - - if(sel.mmin != 1) // !round-off mode - { - GSVector4i rb2, ga2; - - lodi += GSVector4i::x00000001(); - - u = uv[0].sra32(1); - v = uv[1].sra32(1); - - minuv = minuv.srl16(1); - maxuv = maxuv.srl16(1); - - if(sel.ltf) - { - u -= 0x8000; - v -= 0x8000; - - uf = u.xxzzlh().srl16(12); - vf = v.xxzzlh().srl16(12); - } - - GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - GSVector4i uv1 = uv0; - - { - GSVector4i repeat = (uv0 & minuv) | maxuv; - GSVector4i clamp = uv0.sat_i16(minuv, maxuv); - - uv0 = clamp.blend8(repeat, m_global.t.mask); - } - - if(sel.ltf) - { - uv1 = uv1.add16(GSVector4i::x0001()); - - GSVector4i repeat = (uv1 & minuv) | maxuv; - GSVector4i clamp = uv1.sat_i16(minuv, maxuv); - - uv1 = clamp.blend8(repeat, m_global.t.mask); - } - - GSVector4i y0 = uv0.uph16() << (sel.tw + 3); - GSVector4i x0 = uv0.upl16(); - - if(sel.ltf) - { - GSVector4i y1 = uv1.uph16() << (sel.tw + 3); - GSVector4i x1 = uv1.upl16(); - - addr00 = y0 + x0; - addr01 = y0 + x1; - addr10 = y1 + x0; - addr11 = y1 + x1; - - if(sel.tlu) - { - for(int i = 0; i < 4; i++) - { - const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]]; - - c00.u32[i] = m_global.clut[tex[addr00.u32[i]]]; - c01.u32[i] = m_global.clut[tex[addr01.u32[i]]]; - c10.u32[i] = m_global.clut[tex[addr10.u32[i]]]; - c11.u32[i] = m_global.clut[tex[addr11.u32[i]]]; - } - } - else - { - for(int i = 0; i < 4; i++) - { - const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]]; - - c00.u32[i] = tex[addr00.u32[i]]; - c01.u32[i] = tex[addr01.u32[i]]; - c10.u32[i] = tex[addr10.u32[i]]; - c11.u32[i] = tex[addr11.u32[i]]; - } - } - - GSVector4i rb00 = c00.sll16(8).srl16(8); - GSVector4i ga00 = c00.srl16(8); - GSVector4i rb01 = c01.sll16(8).srl16(8); - GSVector4i ga01 = c01.srl16(8); - - rb00 = rb00.lerp16_4(rb01, uf); - ga00 = ga00.lerp16_4(ga01, uf); - - GSVector4i rb10 = c10.sll16(8).srl16(8); - GSVector4i ga10 = c10.srl16(8); - GSVector4i rb11 = c11.sll16(8).srl16(8); - GSVector4i ga11 = c11.srl16(8); - - rb10 = rb10.lerp16_4(rb11, uf); - ga10 = ga10.lerp16_4(ga11, uf); - - rb2 = rb00.lerp16_4(rb10, vf); - ga2 = ga00.lerp16_4(ga10, vf); - } - else - { - addr00 = y0 + x0; - - if(sel.tlu) - { - for(int i = 0; i < 4; i++) - { - c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]]; - } - } - else - { - for(int i = 0; i < 4; i++) - { - c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]; - } - } - - rb2 = c00.sll16(8).srl16(8); - ga2 = c00.srl16(8); - } - - if(sel.lcm) lodf = m_global.lod.f; - - lodf = lodf.srl16(1); - - rb = rb.lerp16<0>(rb2, lodf); - ga = ga.lerp16<0>(ga2, lodf); - } - } - else - { - if(!sel.fst) - { - GSVector4 qrcp = q.rcp(); - - u = GSVector4i(s * qrcp); - v = GSVector4i(t * qrcp); - - if(sel.ltf) - { - u -= 0x8000; - v -= 0x8000; - } - } - else - { - u = GSVector4i::cast(s); - v = GSVector4i::cast(t); - } - - if(sel.ltf) - { - uf = u.xxzzlh().srl16(12); - - if(sel.prim != GS_SPRITE_CLASS) - { - vf = v.xxzzlh().srl16(12); - } - } - - GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - GSVector4i uv1 = uv0; - - { - GSVector4i repeat = (uv0 & m_global.t.min) | m_global.t.max; - GSVector4i clamp = uv0.sat_i16(m_global.t.min, m_global.t.max); - - uv0 = clamp.blend8(repeat, m_global.t.mask); - } - - if(sel.ltf) - { - uv1 = uv1.add16(GSVector4i::x0001()); - - GSVector4i repeat = (uv1 & m_global.t.min) | m_global.t.max; - GSVector4i clamp = uv1.sat_i16(m_global.t.min, m_global.t.max); - - uv1 = clamp.blend8(repeat, m_global.t.mask); - } - - GSVector4i y0 = uv0.uph16() << (sel.tw + 3); - GSVector4i x0 = uv0.upl16(); - - if(sel.ltf) - { - GSVector4i y1 = uv1.uph16() << (sel.tw + 3); - GSVector4i x1 = uv1.upl16(); - - addr00 = y0 + x0; - addr01 = y0 + x1; - addr10 = y1 + x0; - addr11 = y1 + x1; - - if(sel.tlu) - { - const uint8* tex = (const uint8*)m_global.tex[0]; - - c00 = addr00.gather32_32(tex, m_global.clut); - c01 = addr01.gather32_32(tex, m_global.clut); - c10 = addr10.gather32_32(tex, m_global.clut); - c11 = addr11.gather32_32(tex, m_global.clut); - } - else - { - const uint32* tex = (const uint32*)m_global.tex[0]; - - c00 = addr00.gather32_32(tex); - c01 = addr01.gather32_32(tex); - c10 = addr10.gather32_32(tex); - c11 = addr11.gather32_32(tex); - } - - GSVector4i rb00 = c00.sll16(8).srl16(8); - GSVector4i ga00 = c00.srl16(8); - GSVector4i rb01 = c01.sll16(8).srl16(8); - GSVector4i ga01 = c01.srl16(8); - - rb00 = rb00.lerp16_4(rb01, uf); - ga00 = ga00.lerp16_4(ga01, uf); - - GSVector4i rb10 = c10.sll16(8).srl16(8); - GSVector4i ga10 = c10.srl16(8); - GSVector4i rb11 = c11.sll16(8).srl16(8); - GSVector4i ga11 = c11.srl16(8); - - rb10 = rb10.lerp16_4(rb11, uf); - ga10 = ga10.lerp16_4(ga11, uf); - - rb = rb00.lerp16_4(rb10, vf); - ga = ga00.lerp16_4(ga10, vf); - } - else - { - addr00 = y0 + x0; - - if(sel.tlu) - { - c00 = addr00.gather32_32((const uint8*)m_global.tex[0], m_global.clut); - } - else - { - c00 = addr00.gather32_32((const uint32*)m_global.tex[0]); - } - - rb = c00.sll16(8).srl16(8); - ga = c00.srl16(8); - } - } - } - - // AlphaTFX - - if(sel.fb) - { - switch(sel.tfx) - { - case TFX_MODULATE: - ga = ga.modulate16<1>(gaf).clamp8(); - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_DECAL: - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_HIGHLIGHT: - ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); - break; - case TFX_HIGHLIGHT2: - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_NONE: - ga = sel.iip ? gaf.srl16(7) : gaf; - break; - } - - if(sel.aa1) - { - GSVector4i x00800080(0x00800080); - - GSVector4i a = sel.edge ? cov : x00800080; - - if(!sel.abe) - { - ga = ga.mix16(a); - } - else - { - ga = ga.blend8(a, ga.eq16(x00800080).srl32(16).sll32(16)); - } - } - } - - // ReadMask - - if(sel.fwrite) - { - fm = m_global.fm; - } - - if(sel.zwrite) - { - zm = m_global.zm; - } - - // TestAlpha - - if(!TestAlpha(test, fm, zm, ga)) continue; - - // ColorTFX - - if(sel.fwrite) - { - GSVector4i af; - - switch(sel.tfx) - { - case TFX_MODULATE: - rb = rb.modulate16<1>(rbf).clamp8(); - break; - case TFX_DECAL: - break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - af = gaf.yywwlh().srl16(7); - rb = rb.modulate16<1>(rbf).add16(af).clamp8(); - ga = ga.modulate16<1>(gaf).add16(af).clamp8().mix16(ga); - break; - case TFX_NONE: - rb = sel.iip ? rbf.srl16(7) : rbf; - break; - } - } - - // Fog - - if(sel.fwrite && sel.fge) - { - GSVector4i fog = sel.prim != GS_SPRITE_CLASS ? f : m_local.p.f; - - rb = m_global.frb.lerp16<0>(rb, fog); - ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga); - - /* - fog = fog.srl16(7); - - GSVector4i ifog = GSVector4i::x00ff().sub16(fog); - - rb = rb.mul16l(fog).add16(m_global.frb.mul16l(ifog)).srl16(8); - ga = ga.mul16l(fog).add16(m_global.fga.mul16l(ifog)).srl16(8).mix16(ga); - */ - } - - // ReadFrame - - if(sel.fb) - { - fa = fza_base->x + fza_offset->x; - - if(sel.rfb) - { - fd = GSVector4i::load((uint8*)m_global.vm + fa * 2, (uint8*)m_global.vm + fa * 2 + 16); - } - } - - // TestDestAlpha - - if(sel.date && (sel.fpsm == 0 || sel.fpsm == 2)) - { - if(sel.datm) - { - if(sel.fpsm == 2) - { - // test |= fd.srl32(15) == GSVector4i::zero(); - test |= fd.sll32(16).sra32(31) == GSVector4i::zero(); - } - else - { - test |= (~fd).sra32(31); - } - } - else - { - if(sel.fpsm == 2) - { - test |= fd.sll32(16).sra32(31); // == GSVector4i::xffffffff(); - } - else - { - test |= fd.sra32(31); - } - } - - if(test.alltrue()) continue; - } - - // WriteMask - - int fzm = 0; - - if(!sel.notest) - { - if(sel.fwrite) - { - fm |= test; - } - - if(sel.zwrite) - { - zm |= test; - } - - if(sel.fwrite && sel.zwrite) - { - fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask(); - } - else if(sel.fwrite) - { - fzm = ~(fm == GSVector4i::xffffffff()).ps32().mask(); - } - else if(sel.zwrite) - { - fzm = ~(zm == GSVector4i::xffffffff()).ps32().mask(); - } - } - - // WriteZBuf - - if(sel.zwrite) - { - if(sel.ztest && sel.zpsm < 2) - { - zs = zs.blend8(zd, zm); - } - - bool fast = sel.ztest ? sel.zpsm < 2 : sel.zpsm == 0 && sel.notest; - - if(sel.notest) - { - if(fast) - { - GSVector4i::storel((uint8*)m_global.vm + za * 2, zs); - GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs); - } - else - { - WritePixel(zs, za, 0, sel.zpsm); - WritePixel(zs, za, 1, sel.zpsm); - WritePixel(zs, za, 2, sel.zpsm); - WritePixel(zs, za, 3, sel.zpsm); - } - } - else - { - if(fast) - { - if(fzm & 0x0f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs); - if(fzm & 0xf000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs); - } - else - { - if(fzm & 0x0300) WritePixel(zs, za, 0, sel.zpsm); - if(fzm & 0x0c00) WritePixel(zs, za, 1, sel.zpsm); - if(fzm & 0x3000) WritePixel(zs, za, 2, sel.zpsm); - if(fzm & 0xc000) WritePixel(zs, za, 3, sel.zpsm); - } - } - } - - // AlphaBlend - - if(sel.fwrite && (sel.abe || sel.aa1)) - { - GSVector4i rbs = rb, gas = ga, rbd, gad, a, mask; - - if(sel.aba != sel.abb && (sel.aba == 1 || sel.abb == 1 || sel.abc == 1) || sel.abd == 1) - { - switch(sel.fpsm) - { - case 0: - case 1: - rbd = fd.sll16(8).srl16(8); - gad = fd.srl16(8); - break; - case 2: - rbd = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - gad = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - break; - } - } - - if(sel.aba != sel.abb) - { - switch(sel.aba) - { - case 0: break; - case 1: rb = rbd; break; - case 2: rb = GSVector4i::zero(); break; - } - - switch(sel.abb) - { - case 0: rb = rb.sub16(rbs); break; - case 1: rb = rb.sub16(rbd); break; - case 2: break; - } - - if(!(sel.fpsm == 1 && sel.abc == 1)) - { - switch(sel.abc) - { - case 0: a = gas.yywwlh().sll16(7); break; - case 1: a = gad.yywwlh().sll16(7); break; - case 2: a = m_global.afix; break; - } - - rb = rb.modulate16<1>(a); - } - - switch(sel.abd) - { - case 0: rb = rb.add16(rbs); break; - case 1: rb = rb.add16(rbd); break; - case 2: break; - } - } - else - { - switch(sel.abd) - { - case 0: break; - case 1: rb = rbd; break; - case 2: rb = GSVector4i::zero(); break; - } - } - - if(sel.pabe) - { - mask = (gas << 8).sra32(31); - - rb = rbs.blend8(rb, mask); - } - - if(sel.aba != sel.abb) - { - switch(sel.aba) - { - case 0: break; - case 1: ga = gad; break; - case 2: ga = GSVector4i::zero(); break; - } - - switch(sel.abb) - { - case 0: ga = ga.sub16(gas); break; - case 1: ga = ga.sub16(gad); break; - case 2: break; - } - - if(!(sel.fpsm == 1 && sel.abc == 1)) - { - ga = ga.modulate16<1>(a); - } - - switch(sel.abd) - { - case 0: ga = ga.add16(gas); break; - case 1: ga = ga.add16(gad); break; - case 2: break; - } - } - else - { - switch(sel.abd) - { - case 0: break; - case 1: ga = gad; break; - case 2: ga = GSVector4i::zero(); break; - } - } - - if(sel.pabe) - { - ga = gas.blend8(ga, mask >> 16); - } - else - { - if(sel.fpsm != 1) - { - ga = ga.mix16(gas); - } - } - } - - // WriteFrame - - if(sel.fwrite) - { - if(sel.fpsm == 2 && sel.dthe) - { - int y = (top & 3) << 1; - - rb = rb.add16(m_global.dimx[0 + y]); - ga = ga.add16(m_global.dimx[1 + y]); - } - - if(sel.colclamp == 0) - { - rb &= GSVector4i::x00ff(); - ga &= GSVector4i::x00ff(); - } - - GSVector4i fs = rb.upl16(ga).pu16(rb.uph16(ga)); - - if(sel.fba && sel.fpsm != 1) - { - fs |= GSVector4i::x80000000(); - } - - if(sel.fpsm == 2) - { - GSVector4i rb = fs & 0x00f800f8; - GSVector4i ga = fs & 0x8000f800; - - fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); - } - - if(sel.rfb) - { - fs = fs.blend(fd, fm); - } - - bool fast = sel.rfb ? sel.fpsm < 2 : sel.fpsm == 0 && sel.notest; - - if(sel.notest) - { - if(fast) - { - GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs); - GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs); - } - else - { - WritePixel(fs, fa, 0, sel.fpsm); - WritePixel(fs, fa, 1, sel.fpsm); - WritePixel(fs, fa, 2, sel.fpsm); - WritePixel(fs, fa, 3, sel.fpsm); - } - } - else - { - if(fast) - { - if(fzm & 0x000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs); - if(fzm & 0x00f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs); - } - else - { - if(fzm & 0x0003) WritePixel(fs, fa, 0, sel.fpsm); - if(fzm & 0x000c) WritePixel(fs, fa, 1, sel.fpsm); - if(fzm & 0x0030) WritePixel(fs, fa, 2, sel.fpsm); - if(fzm & 0x00c0) WritePixel(fs, fa, 3, sel.fpsm); - } - } - } - } - while(0); - - if(sel.edge) break; - - if(steps <= 0) break; - - // Step - - steps -= 4; - - fza_offset++; - - if(sel.prim != GS_SPRITE_CLASS) - { - if(sel.zb) - { - zo += m_local.d4.z; - } - - if(sel.fwrite && sel.fge) - { - f = f.add16(m_local.d4.f); - } - } - - if(sel.fb) - { - if(sel.tfx != TFX_NONE) - { - if(sel.fst) - { - GSVector4i stq = GSVector4i::cast(m_local.d4.stq); - - s = GSVector4::cast(GSVector4i::cast(s) + stq.xxxx()); - - if(sel.prim != GS_SPRITE_CLASS || sel.mmin) - { - t = GSVector4::cast(GSVector4i::cast(t) + stq.yyyy()); - } - } - else - { - GSVector4 stq = m_local.d4.stq; - - s += stq.xxxx(); - t += stq.yyyy(); - q += stq.zzzz(); - } - } - } - - if(!(sel.tfx == TFX_DECAL && sel.tcc)) - { - if(sel.iip) - { - GSVector4i c = m_local.d4.c; - - rbf = rbf.add16(c.xxxx()).max_i16(GSVector4i::zero()); - gaf = gaf.add16(c.yyyy()).max_i16(GSVector4i::zero()); - } - } - - if(!sel.notest) - { - test = GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))]; - } - } - - #endif -} - -void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) -{ - uint32 zwrite = m_global.sel.zwrite; - uint32 edge = m_global.sel.edge; - - m_global.sel.zwrite = 0; - m_global.sel.edge = 1; - - DrawScanline(pixels, left, top, scan); - - m_global.sel.zwrite = zwrite; - m_global.sel.edge = edge; -} - -template -bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga) -{ - GSScanlineSelector sel = m_global.sel; - - switch(sel.afail) - { - case AFAIL_FB_ONLY: - if(!sel.zwrite) return true; - break; - - case AFAIL_ZB_ONLY: - if(!sel.fwrite) return true; - break; - - case AFAIL_RGB_ONLY: - if(!sel.zwrite && sel.fpsm == 1) return true; - break; - } - - T t; - - switch(sel.atst) - { - case ATST_NEVER: - t = GSVector4i::xffffffff(); - break; - - case ATST_ALWAYS: - return true; - - case ATST_LESS: - case ATST_LEQUAL: - t = (ga >> 16) > T(m_global.aref); - break; - - case ATST_EQUAL: - t = (ga >> 16) != T(m_global.aref); - break; - - case ATST_GEQUAL: - case ATST_GREATER: - t = (ga >> 16) < T(m_global.aref); - break; - - case ATST_NOTEQUAL: - t = (ga >> 16) == T(m_global.aref); - break; - - default: - __assume(0); - } - - switch(sel.afail) - { - case AFAIL_KEEP: - test |= t; - if(test.alltrue()) return false; - break; - - case AFAIL_FB_ONLY: - zm |= t; - break; - - case AFAIL_ZB_ONLY: - fm |= t; - break; - - case AFAIL_RGB_ONLY: - zm |= t; - fm |= t & T::xff000000(); // fpsm 16 bit => & 0xffff8000? - break; - - default: - __assume(0); - } - - return true; -} - -static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; // columnTable16[0] - -template void GSDrawScanline::WritePixel(const T& src, int addr, int i, uint32 psm) -{ - uint8* dst = (uint8*)m_global.vm + addr * 2 + s_offsets[i] * 2; - - switch(psm) - { - case 0: - *(uint32*)dst = src.u32[i]; - break; - case 1: - *(uint32*)dst = (src.u32[i] & 0xffffff) | (*(uint32*)dst & 0xff000000); - break; - case 2: - *(uint16*)dst = src.u16[i * 2]; - break; - } -} - -#endif - -void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v) -{ - ASSERT(r.y >= 0); - ASSERT(r.w >= 0); - - // FIXME: sometimes the frame and z buffer may overlap, the outcome is undefined - - uint32 m; - - #if _M_SSE >= 0x501 - m = m_global.zm; - #else - m = m_global.zm.u32[0]; - #endif - - if(m != 0xffffffff) - { - const int* zbr = m_global.zbr; - const int* zbc = m_global.zbc; - - uint32 z = v.t.u32[3]; // (uint32)v.p.z; - - if(m_global.sel.zpsm != 2) - { - if(m == 0) - { - DrawRectT(zbr, zbc, r, z, m); - } - else - { - DrawRectT(zbr, zbc, r, z, m); - } - } - else - { - if((m & 0xffff) == 0) - { - DrawRectT(zbr, zbc, r, z, m); - } - else - { - DrawRectT(zbr, zbc, r, z, m); - } - } - } - - #if _M_SSE >= 0x501 - m = m_global.fm; - #else - m = m_global.fm.u32[0]; - #endif - - if(m != 0xffffffff) - { - const int* fbr = m_global.fbr; - const int* fbc = m_global.fbc; - - uint32 c = (GSVector4i(v.c) >> 7).rgba32(); - - if(m_global.sel.fba) - { - c |= 0x80000000; - } - - if(m_global.sel.fpsm != 2) - { - if(m == 0) - { - DrawRectT(fbr, fbc, r, c, m); - } - else - { - DrawRectT(fbr, fbc, r, c, m); - } - } - else - { - c = ((c & 0xf8) >> 3) | ((c & 0xf800) >> 6) | ((c & 0xf80000) >> 9) | ((c & 0x80000000) >> 16); - - if((m & 0xffff) == 0) - { - DrawRectT(fbr, fbc, r, c, m); - } - else - { - DrawRectT(fbr, fbc, r, c, m); - } - } - } -} - -template -void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) -{ - if(m == 0xffffffff) return; - - #if _M_SSE >= 0x501 - - GSVector8i color((int)c); - GSVector8i mask((int)m); - - #else - - GSVector4i color((int)c); - GSVector4i mask((int)m); - - #endif - - if(sizeof(T) == sizeof(uint16)) - { - color = color.xxzzlh(); - mask = mask.xxzzlh(); - c = (c & 0xffff) | (c << 16); - m = (m & 0xffff) | (m << 16); - } - - color = color.andnot(mask); - c = c & (~m); - - if(masked) ASSERT(mask.u32[0] != 0); - - GSVector4i br = r.ralign(GSVector2i(8 * 4 / sizeof(T), 8)); - - if(!br.rempty()) - { - FillRect(row, col, GSVector4i(r.x, r.y, r.z, br.y), c, m); - FillRect(row, col, GSVector4i(r.x, br.w, r.z, r.w), c, m); - - if(r.x < br.x || br.z < r.z) - { - FillRect(row, col, GSVector4i(r.x, br.y, br.x, br.w), c, m); - FillRect(row, col, GSVector4i(br.z, br.y, r.z, br.w), c, m); - } - - FillBlock(row, col, br, color, mask); - } - else - { - FillRect(row, col, r, c, m); - } -} - -template -void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) -{ - if(r.x >= r.z) return; - - T* vm = (T*)m_global.vm; - - for(int y = r.y; y < r.w; y++) - { - T* RESTRICT d = &vm[row[y]]; - - for(int x = r.x; x < r.z; x++) - { - d[col[x]] = (T)(!masked ? c : (c | (d[col[x]] & m))); - } - } -} - -#if _M_SSE >= 0x501 - -template -void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m) -{ - if(r.x >= r.z) return; - - T* vm = (T*)m_global.vm; - - for(int y = r.y; y < r.w; y += 8) - { - T* RESTRICT d = &vm[row[y]]; - - for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) - { - GSVector8i* RESTRICT p = (GSVector8i*)&d[col[x]]; - - p[0] = !masked ? c : (c | (p[0] & m)); - p[1] = !masked ? c : (c | (p[1] & m)); - p[2] = !masked ? c : (c | (p[2] & m)); - p[3] = !masked ? c : (c | (p[3] & m)); - p[4] = !masked ? c : (c | (p[4] & m)); - p[5] = !masked ? c : (c | (p[5] & m)); - p[6] = !masked ? c : (c | (p[6] & m)); - p[7] = !masked ? c : (c | (p[7] & m)); - } - } -} - -#else - -template -void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m) -{ - if(r.x >= r.z) return; - - T* vm = (T*)m_global.vm; - - for(int y = r.y; y < r.w; y += 8) - { - T* RESTRICT d = &vm[row[y]]; - - for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) - { - GSVector4i* RESTRICT p = (GSVector4i*)&d[col[x]]; - - for(int i = 0; i < 16; i += 4) - { - p[i + 0] = !masked ? c : (c | (p[i + 0] & m)); - p[i + 1] = !masked ? c : (c | (p[i + 1] & m)); - p[i + 2] = !masked ? c : (c | (p[i + 2] & m)); - p[i + 3] = !masked ? c : (c | (p[i + 3] & m)); - } - } - } -} - -#endif diff --git a/plugins/GSdx_legacy/GSDrawScanline.h b/plugins/GSdx_legacy/GSDrawScanline.h deleted file mode 100644 index f1acc6a0b9..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanline.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSState.h" -#include "GSRasterizer.h" -#include "GSScanlineEnvironment.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSDrawScanlineCodeGenerator.h" - -class GSDrawScanline : public IDrawScanline -{ -public: - class SharedData : public GSRasterizerData - { - public: - GSScanlineGlobalData global; - }; - -protected: - GSScanlineGlobalData m_global; - GSScanlineLocalData m_local; - - GSCodeGeneratorFunctionMap m_sp_map; - GSCodeGeneratorFunctionMap m_ds_map; - - template - void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); - - template - __forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); - - #if _M_SSE >= 0x501 - - template - __forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m); - - #else - - template - __forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m); - - #endif - -public: - GSDrawScanline(); - virtual ~GSDrawScanline(); - - // IDrawScanline - - void BeginDraw(const GSRasterizerData* data); - void EndDraw(uint64 frame, uint64 ticks, int actual, int total); - - void DrawRect(const GSVector4i& r, const GSVertexSW& v); - -#ifndef ENABLE_JIT_RASTERIZER - - void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan); - void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan); - void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan); - - bool IsEdge() const {return m_global.sel.aa1;} - bool IsRect() const {return m_global.sel.IsSolidRect();} - - template bool TestAlpha(T& test, T& fm, T& zm, const T& ga); - template void WritePixel(const T& src, int addr, int i, uint32 psm); - -#endif - - void PrintStats() {m_ds_map.PrintStats();} -}; diff --git a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.cpp b/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.cpp deleted file mode 100644 index 3909a6e787..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.cpp +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDrawScanlineCodeGenerator.h" - -#if _M_SSE >= 0x501 - -__aligned(const uint8, 8) GSDrawScanlineCodeGenerator::m_test[16][8] = -{ - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00}, - {0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, -}; - -const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] = -{ - GSVector8(0.204446009836232697516f), - GSVector8(-1.04913055217340124191f), - GSVector8(2.28330284476918490682f), - GSVector8(1.0f), -}; - -#else - -const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] = -{ - GSVector4i::zero(), - GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), - GSVector4i::zero(), -}; - -const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] = -{ - GSVector4(0.204446009836232697516f), - GSVector4(-1.04913055217340124191f), - GSVector4(2.28330284476918490682f), - GSVector4(1.0f), -}; - -#endif - -GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) - , m_local(*(GSScanlineLocalData*)param) -{ - m_sel.key = key; - - Generate(); -} - -#if _M_SSE >= 0x501 - -void GSDrawScanlineCodeGenerator::modulate16(const Ymm& a, const Operand& f, int shift) -{ - if(shift == 0) - { - vpmulhrsw(a, f); - } - else - { - vpsllw(a, (uint8)(shift + 1)); - vpmulhw(a, f); - } -} - -void GSDrawScanlineCodeGenerator::lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift) -{ - vpsubw(a, b); - modulate16(a, f, shift); - vpaddw(a, b); -} - -void GSDrawScanlineCodeGenerator::lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f) -{ - vpsubw(a, b); - vpmullw(a, f); - vpsraw(a, 4); - vpaddw(a, b); -} - -void GSDrawScanlineCodeGenerator::mix16(const Ymm& a, const Ymm& b, const Ymm& temp) -{ - vpblendw(a, b, 0xaa); -} - -void GSDrawScanlineCodeGenerator::clamp16(const Ymm& a, const Ymm& temp) -{ - vpackuswb(a, a); - vpermq(a, a, _MM_SHUFFLE(3, 1, 2, 0)); // this sucks - vpmovzxbw(a, a); -} - -void GSDrawScanlineCodeGenerator::alltrue() -{ - vpmovmskb(eax, ymm7); - cmp(eax, 0xffffffff); - je("step", T_NEAR); -} - -void GSDrawScanlineCodeGenerator::blend(const Ymm& a, const Ymm& b, const Ymm& mask) -{ - vpand(b, mask); - vpandn(mask, a); - vpor(a, b, mask); -} - -void GSDrawScanlineCodeGenerator::blendr(const Ymm& b, const Ymm& a, const Ymm& mask) -{ - vpand(b, mask); - vpandn(mask, a); - vpor(b, mask); -} - -void GSDrawScanlineCodeGenerator::blend8(const Ymm& a, const Ymm& b) -{ - vpblendvb(a, a, b, xmm0); -} - -void GSDrawScanlineCodeGenerator::blend8r(const Ymm& b, const Ymm& a) -{ - vpblendvb(b, a, b, xmm0); -} - -#else - -void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int shift) -{ - #if _M_SSE >= 0x500 - - if(shift == 0) - { - vpmulhrsw(a, f); - } - else - { - vpsllw(a, shift + 1); - vpmulhw(a, f); - } - - #else - - if(shift == 0 && m_cpu.has(util::Cpu::tSSSE3)) - { - pmulhrsw(a, f); - } - else - { - psllw(a, shift + 1); - pmulhw(a, f); - } - - #endif -} - -void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift) -{ - #if _M_SSE >= 0x500 - - vpsubw(a, b); - modulate16(a, f, shift); - vpaddw(a, b); - - #else - - psubw(a, b); - modulate16(a, f, shift); - paddw(a, b); - - #endif -} - -void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f) -{ - #if _M_SSE >= 0x500 - - vpsubw(a, b); - vpmullw(a, f); - vpsraw(a, 4); - vpaddw(a, b); - - #else - - psubw(a, b); - pmullw(a, f); - psraw(a, 4); - paddw(a, b); - - #endif -} - -void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& temp) -{ - #if _M_SSE >= 0x500 - - vpblendw(a, b, 0xaa); - - #elif _M_SSE >= 0x401 - - pblendw(a, b, 0xaa); - - #else - - pcmpeqd(temp, temp); - psrld(temp, 16); - pand(a, temp); - pandn(temp, b); - por(a, temp); - - #endif -} - -void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp) -{ - #if _M_SSE >= 0x500 - - vpackuswb(a, a); - vpmovzxbw(a, a); - - #elif _M_SSE >= 0x401 - - packuswb(a, a); - pmovzxbw(a, a); - - #else - - packuswb(a, a); - pxor(temp, temp); - punpcklbw(a, temp); - - #endif -} - -void GSDrawScanlineCodeGenerator::alltrue() -{ - #if _M_SSE >= 0x500 - - vpmovmskb(eax, xmm7); - cmp(eax, 0xffff); - je("step", T_NEAR); - - #else - - pmovmskb(eax, xmm7); - cmp(eax, 0xffff); - je("step", T_NEAR); - - #endif -} - -void GSDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask) -{ - #if _M_SSE >= 0x500 - - vpand(b, mask); - vpandn(mask, a); - vpor(a, b, mask); - - #else - - pand(b, mask); - pandn(mask, a); - por(b, mask); - movdqa(a, b); - - #endif -} - -void GSDrawScanlineCodeGenerator::blendr(const Xmm& b, const Xmm& a, const Xmm& mask) -{ - #if _M_SSE >= 0x500 - - vpand(b, mask); - vpandn(mask, a); - vpor(b, mask); - - #else - - pand(b, mask); - pandn(mask, a); - por(b, mask); - - #endif -} - -void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b) -{ - #if _M_SSE >= 0x500 - - vpblendvb(a, a, b, xmm0); - - #elif _M_SSE >= 0x401 - - pblendvb(a, b); - - #else - - blend(a, b, xmm0); - - #endif -} - -void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a) -{ - #if _M_SSE >= 0x500 - - vpblendvb(b, a, b, xmm0); - - #elif _M_SSE >= 0x401 - - pblendvb(a, b); - movdqa(b, a); - - #else - - blendr(b, a, xmm0); - - #endif -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.h b/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.h deleted file mode 100644 index 282285bcbd..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSScanlineEnvironment.h" -#include "GSFunctionMap.h" - -using namespace Xbyak; - -class GSDrawScanlineCodeGenerator : public GSCodeGenerator -{ - void operator = (const GSDrawScanlineCodeGenerator&); - - GSScanlineSelector m_sel; - GSScanlineLocalData& m_local; - - void Generate(); - - #if _M_SSE >= 0x501 - - void Init(); - void Step(); - void TestZ(const Ymm& temp1, const Ymm& temp2); - void SampleTexture(); - void Wrap(const Ymm& uv0); - void Wrap(const Ymm& uv0, const Ymm& uv1); - void SampleTextureLOD(); - void WrapLOD(const Ymm& uv0); - void WrapLOD(const Ymm& uv0, const Ymm& uv1); - void AlphaTFX(); - void ReadMask(); - void TestAlpha(); - void ColorTFX(); - void Fog(); - void ReadFrame(); - void TestDestAlpha(); - void WriteMask(); - void WriteZBuf(); - void AlphaBlend(); - void WriteFrame(); - - #if defined(_M_AMD64) || defined(_WIN64) - void ReadPixel(const Ymm& dst, const Ymm& temp, const Reg64& addr); - void WritePixel(const Ymm& src, const Ymm& temp, const Reg64& addr, const Reg32& mask, bool fast, int psm, int fz); - void WritePixel(const Xmm& src, const Reg64& addr, uint8 i, uint8 j, int psm); - #else - void ReadPixel(const Ymm& dst, const Ymm& temp, const Reg32& addr); - void WritePixel(const Ymm& src, const Ymm& temp, const Reg32& addr, const Reg32& mask, bool fast, int psm, int fz); - void WritePixel(const Xmm& src, const Reg32& addr, uint8 i, uint8 j, int psm); - #endif - - void ReadTexel(int pixels, int mip_offset = 0); - void ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i); - - void modulate16(const Ymm& a, const Operand& f, int shift); - void lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift); - void lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f); - void mix16(const Ymm& a, const Ymm& b, const Ymm& temp); - void clamp16(const Ymm& a, const Ymm& temp); - void alltrue(); - void blend(const Ymm& a, const Ymm& b, const Ymm& mask); - void blendr(const Ymm& b, const Ymm& a, const Ymm& mask); - void blend8(const Ymm& a, const Ymm& b); - void blend8r(const Ymm& b, const Ymm& a); - - #else - - void Init(); - void Step(); - void TestZ(const Xmm& temp1, const Xmm& temp2); - void SampleTexture(); - void Wrap(const Xmm& uv0); - void Wrap(const Xmm& uv0, const Xmm& uv1); - void SampleTextureLOD(); - void WrapLOD(const Xmm& uv0); - void WrapLOD(const Xmm& uv0, const Xmm& uv1); - void AlphaTFX(); - void ReadMask(); - void TestAlpha(); - void ColorTFX(); - void Fog(); - void ReadFrame(); - void TestDestAlpha(); - void WriteMask(); - void WriteZBuf(); - void AlphaBlend(); - void WriteFrame(); - - #if defined(_M_AMD64) || defined(_WIN64) - void ReadPixel(const Xmm& dst, const Reg64& addr); - void WritePixel(const Xmm& src, const Reg64& addr, const Reg8& mask, bool fast, int psm, int fz); - void WritePixel(const Xmm& src, const Reg64& addr, uint8 i, int psm); - #else - void ReadPixel(const Xmm& dst, const Reg32& addr); - void WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz); - void WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm); - #endif - - void ReadTexel(int pixels, int mip_offset = 0); - void ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i); - - void modulate16(const Xmm& a, const Operand& f, int shift); - void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift); - void lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f); - void mix16(const Xmm& a, const Xmm& b, const Xmm& temp); - void clamp16(const Xmm& a, const Xmm& temp); - void alltrue(); - void blend(const Xmm& a, const Xmm& b, const Xmm& mask); - void blendr(const Xmm& b, const Xmm& a, const Xmm& mask); - void blend8(const Xmm& a, const Xmm& b); - void blend8r(const Xmm& b, const Xmm& a); - - #endif - -public: - GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); - - #if _M_SSE >= 0x501 - static __aligned(const uint8, 8) m_test[16][8]; - static const GSVector8 m_log2_coef[4]; - #else - static const GSVector4i m_test[8]; - static const GSVector4 m_log2_coef[4]; - #endif - -}; diff --git a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x64.avx.cpp b/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x64.avx.cpp deleted file mode 100644 index a4f3a9bc4b..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x64.avx.cpp +++ /dev/null @@ -1,1828 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDrawScanlineCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE == 0x500 && (defined(_M_AMD64) || defined(_WIN64)) - -#error TODO - -void GSDrawScanlineCodeGenerator::Generate() -{ - // TODO: on linux/mac rsi, rdi, xmm6-xmm15 are all caller saved - - push(rbx); - push(rsi); - push(rdi); - push(rbp); - push(r12); - push(r13); - - sub(rsp, 8 + 10 * 16); - - for(int i = 6; i < 16; i++) - { - vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i)); - } - - mov(r10, (size_t)&m_test[0]); - mov(r11, (size_t)&m_local); - mov(r12, (size_t)m_local.gd); - mov(r13, (size_t)m_local.gd->vm); - - Init(); - - // rcx = steps - // rsi = fza_base - // rdi = fza_offset - // r10 = &m_test[0] - // r11 = &m_local - // r12 = m_local->gd - // r13 = m_local->gd.vm - // xmm7 = vf (sprite && ltf) - // xmm8 = z - // xmm9 = f - // xmm10 = s - // xmm11 = t - // xmm12 = q - // xmm13 = rb - // xmm14 = ga - // xmm15 = test - - if(!m_sel.edge) - { - align(16); - } - -L("loop"); - - TestZ(xmm5, xmm6); - - // ebp = za - - if(m_sel.mmin) - { - SampleTextureLOD(); - } - else - { - SampleTexture(); - } - - // ebp = za - // xmm2 = rb - // xmm3 = ga - - AlphaTFX(); - - // ebp = za - // xmm2 = rb - // xmm3 = ga - - ReadMask(); - - // ebp = za - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm5 = zm - - TestAlpha(); - - // ebp = za - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm5 = zm - - ColorTFX(); - - // ebp = za - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm5 = zm - - Fog(); - - // ebp = za - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm5 = zm - - ReadFrame(); - - // ebx = fa - // ebp = za - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm5 = zm - // xmm6 = fd - - TestDestAlpha(); - - // ebx = fa - // ebp = za - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm5 = zm - // xmm6 = fd - - WriteMask(); - - // ebx = fa - // edx = fzm - // ebp = za - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm5 = zm - // xmm6 = fd - - WriteZBuf(); - - // ebx = fa - // edx = fzm - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm6 = fd - - AlphaBlend(); - - // ebx = fa - // edx = fzm - // xmm2 = rb - // xmm3 = ga - // xmm4 = fm - // xmm6 = fd - - WriteFrame(); - -L("step"); - - // if(steps <= 0) break; - - if(!m_sel.edge) - { - test(rcx, rcx); - - jle("exit", T_NEAR); - - Step(); - - jmp("loop", T_NEAR); - } - -L("exit"); - - for(int i = 6; i < 16; i++) - { - vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]); - } - - add(rsp, 8 + 10 * 16); - - pop(r13); - pop(r12); - pop(rbp); - pop(rdi); - pop(rsi); - pop(rbx); - - ret(); -} - -void GSDrawScanlineCodeGenerator::Init() -{ - // int skip = left & 3; - - mov(rbx, rdx); - and(rdx, 3); - - // left -= skip; - - sub(rbx, rdx); - - // int steps = pixels + skip - 4; - - lea(rcx, ptr[rcx + rdx - 4]); - - // GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))]; - - shl(rdx, 4); - - vmovdqa(xmm15, ptr[rdx + r10]); - - mov(rax, rcx); - sar(rax, 63); - and(rax, rcx); - shl(rax, 4); - - vpor(xmm15, ptr[rax + r10 + 7 * 16]); - - // GSVector2i* fza_base = &m_local.gd->fzbr[top]; - - mov(rax, (size_t)m_local.gd->fzbr); - lea(rsi, ptr[rax + r8 * 8]); - - // GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2]; - - mov(rax, (size_t)m_local.gd->fzbc); - lea(rdi, ptr[rax + rbx * 2]); - - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) - { - // edx = &m_local.d[skip] - - lea(rdx, ptr[rdx * 8 + r11 + offsetof(GSScanlineLocalData, d)]); - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) - { - vmovaps(xmm0, ptr[r9 + offsetof(GSVertexSW, p)]); // v.p - - if(m_sel.fwrite && m_sel.fge) - { - // f = GSVector4i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); - - vcvttps2dq(xmm9, xmm0); - vpshufhw(xmm9, xmm9, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(xmm9, xmm9, _MM_SHUFFLE(2, 2, 2, 2)); - vpaddw(xmm9, ptr[rdx + 16 * 6]); - } - - if(m_sel.zb) - { - // z = vp.zzzz() + m_local.d[skip].z; - - vshufps(xmm8, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - vaddps(xmm8, ptr[rdx]); - } - } - } - else - { - if(m_sel.ztest) - { - vmovdqa(xmm8, ptr[r11 + offsetof(GSScanlineLocalData, p.z)]); - } - } - - if(m_sel.fb) - { - if(m_sel.edge || m_sel.tfx != TFX_NONE) - { - vmovaps(xmm0, ptr[r9 + offsetof(GSVertexSW, t)]); // v.t - } - - if(m_sel.edge) - { - vpshufhw(xmm1, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - vpsrlw(xmm1, 9); - - vmovdqa(ptr[r11 + offsetof(GSScanlineLocalData, temp.cov)], xmm1); - } - - if(m_sel.tfx != TFX_NONE) - { - if(m_sel.fst) - { - // GSVector4i vti(vt); - - vcvttps2dq(xmm0, xmm0); - - // s = vti.xxxx() + m_local.d[skip].s; - // t = vti.yyyy(); if(!sprite) t += m_local.d[skip].t; - - vpshufd(xmm10, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm11, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - - vpaddd(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]); - - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) - { - vpaddd(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]); - } - else - { - if(m_sel.ltf) - { - vpshuflw(xmm6, xmm11, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm6, 1); - } - } - } - else - { - // s = vt.xxxx() + m_local.d[skip].s; - // t = vt.yyyy() + m_local.d[skip].t; - // q = vt.zzzz() + m_local.d[skip].q; - - vshufps(xmm10, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(xmm11, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(xmm12, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vaddps(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]); - vaddps(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]); - vaddps(xmm12, ptr[rdx + offsetof(GSScanlineLocalData::skip, q)]); - } - } - - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) - { - if(m_sel.iip) - { - // GSVector4i vc = GSVector4i(v.c); - - vcvttps2dq(xmm0, ptr[r9 + offsetof(GSVertexSW, c)]); // v.c - - // vc = vc.upl16(vc.zwxy()); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(xmm0, xmm1); - - // rb = vc.xxxx().add16(m_local.d[skip].rb); - // ga = vc.zzzz().add16(m_local.d[skip].ga); - - vpshufd(xmm13, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm14, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vpaddw(xmm13, ptr[rdx + offsetof(GSScanlineLocalData::skip, rb)]); - vpaddw(xmm14, ptr[rdx + offsetof(GSScanlineLocalData::skip, ga)]); - } - else - { - vmovdqa(xmm13, ptr[r11 + offsetof(GSScanlineLocalData, c.rb)]); - vmovdqa(xmm14, ptr[r11 + offsetof(GSScanlineLocalData, c.ga)]); - } - } - } -} - -void GSDrawScanlineCodeGenerator::Step() -{ - // steps -= 4; - - sub(rcx, 4); - - // fza_offset++; - - add(rdi, 8); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // z += m_local.d4.z; - - if(m_sel.zb) - { - vaddps(xmm8, ptr[r11 + offsetof(GSScanlineLocalData, d4.z)]); - } - - // f = f.add16(m_local.d4.f); - - if(m_sel.fwrite && m_sel.fge) - { - vpaddw(xmm9, ptr[r11 + offsetof(GSScanlineLocalData, d4.f)]); - } - } - else - { - if(m_sel.ztest) - { - } - } - - if(m_sel.fb) - { - if(m_sel.tfx != TFX_NONE) - { - if(m_sel.fst) - { - // GSVector4i st = m_local.d4.st; - - // si += st.xxxx(); - // if(!sprite) ti += st.yyyy(); - - vmovdqa(xmm0, ptr[r11 + offsetof(GSScanlineLocalData, d4.stq)]); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpaddd(xmm10, xmm1); - - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) - { - vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - vpaddd(xmm11, xmm1); - } - } - else - { - // GSVector4 stq = m_local.d4.stq; - - // s += stq.xxxx(); - // t += stq.yyyy(); - // q += stq.zzzz(); - - vmovaps(xmm0, ptr[r11 + offsetof(GSScanlineLocalData, d4.stq)]); - - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vaddps(xmm10, xmm1); - vaddps(xmm11, xmm2); - vaddps(xmm12, xmm3); - } - } - - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) - { - if(m_sel.iip) - { - // GSVector4i c = m_local.d4.c; - - // rb = rb.add16(c.xxxx()); - // ga = ga.add16(c.yyyy()); - - vmovdqa(xmm0, ptr[r11 + offsetof(GSScanlineLocalData, d4.c)]); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm2, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - - vpaddw(xmm13, xmm1); - vpaddw(xmm14, xmm2); - - // FIXME: color may underflow and roll over at the end of the line, if decreasing - - vpxor(xmm0, xmm0); - vpmaxsw(xmm13, xmm0); - vpmaxsw(xmm14, xmm0); - } - else - { - if(m_sel.tfx == TFX_NONE) - { - } - } - } - } - - // test = m_test[7 + (steps & (steps >> 31))]; - - mov(rdx, rcx); - sar(rdx, 63); - and(rdx, rcx); - shl(rdx, 4); - - vmovdqa(xmm15, ptr[rdx + r10 + 7 * 16]); -} - -void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) -{ - if(!m_sel.zb) - { - return; - } - - // int za = fza_base.y + fza_offset->y; - - movsxd(rbp, dword[rsi + 4]); - movsxd(rax, dword[rdi + 4]); - add(rbp, rax); - - // GSVector4i zs = zi; - - if(m_sel.prim != GS_SPRITE_CLASS) - { - if(m_sel.zoverflow) - { - // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); - - mov(rax, (size_t)&GSVector4::m_half); - - vbroadcastss(xmm0, ptr[rax]); - vmulps(xmm0, xmm8); - vcvttps2dq(xmm0, xmm0); - vpslld(xmm0, 1); - - vcvttps2dq(xmm1, xmm8); - vpcmpeqd(xmm2, xmm2); - vpsrld(xmm2, 31); - vpand(xmm1, xmm2); - - vpor(xmm0, xmm1); - } - else - { - // zs = GSVector4i(z); - - vcvttps2dq(xmm0, xmm8); - } - - if(m_sel.zwrite) - { - vmovdqa(ptr[r11 + offsetof(GSScanlineLocalData, temp.zs)], xmm0); - } - } - - if(m_sel.ztest) - { - ReadPixel(xmm1, rbp); - - if(m_sel.zwrite && m_sel.zpsm < 2) - { - vmovdqa(ptr[r11 + offsetof(GSScanlineLocalData, temp.zd)], xmm1); - } - - // zd &= 0xffffffff >> m_sel.zpsm * 8; - - if(m_sel.zpsm) - { - vpslld(xmm1, m_sel.zpsm * 8); - vpsrld(xmm1, m_sel.zpsm * 8); - } - - if(m_sel.zoverflow || m_sel.zpsm == 0) - { - // GSVector4i o = GSVector4i::x80000000(); - - vpcmpeqd(xmm2, xmm2); - vpslld(xmm2, 31); - - // GSVector4i zso = zs - o; - // GSVector4i zdo = zd - o; - - vpsubd(xmm0, xmm2); - vpsubd(xmm1, xmm2); - } - - switch(m_sel.ztst) - { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - vpcmpgtd(xmm1, xmm0); - vpor(xmm15, xmm1); - break; - - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - vpcmpgtd(xmm0, xmm1); - vpcmpeqd(xmm2, xmm2); - vpxor(xmm0, xmm2); - vpor(xmm15, xmm0); - break; - } - - alltrue(); - } -} - -void GSDrawScanlineCodeGenerator::SampleTexture() -{ - if(!m_sel.fb || m_sel.tfx == TFX_NONE) - { - return; - } - - mov(rbx, ptr[r12 + offsetof(GSScanlineGlobalData, tex)]); - - // ebx = tex - - if(!m_sel.fst) - { - vrcpps(xmm0, xmm12); - - vmulps(xmm4, xmm10, xmm0); - vmulps(xmm5, xmm11, xmm0); - - vcvttps2dq(xmm4, xmm4); - vcvttps2dq(xmm5, xmm5); - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - vmovd(xmm0, eax); - vpshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - - vpsubd(xmm4, xmm0); - vpsubd(xmm5, xmm0); - } - } - else - { - vmovdqa(xmm4, xmm10); - vmovdqa(xmm5, xmm11); - } - - if(m_sel.ltf) - { - // GSVector4i uf = u.xxzzlh().srl16(1); - - vpshuflw(xmm6, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm6, 1); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4i vf = v.xxzzlh().srl16(1); - - vpshuflw(xmm7, xmm5, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm7, xmm7, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm7, 1); - } - } - - // GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - - vpsrad(xmm4, 16); - vpsrad(xmm5, 16); - vpackssdw(xmm4, xmm5); - - if(m_sel.ltf) - { - // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); - - vpcmpeqd(xmm0, xmm0); - vpsrlw(xmm0, 15); - vpaddw(xmm5, xmm4, xmm0); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - Wrap(xmm4, xmm5); - } - else - { - // uv0 = Wrap(uv0); - - Wrap(xmm4); - } - - // xmm4 = uv0 - // xmm5 = uv1 (ltf) - // xmm6 = uf - // xmm7 = vf - - // GSVector4i x0 = uv0.upl16(); - // GSVector4i y0 = uv0.uph16() << tw; - - vpxor(xmm0, xmm0); - - vpunpcklwd(xmm2, xmm4, xmm0); - vpunpckhwd(xmm3, xmm4, xmm0); - vpslld(xmm3, m_sel.tw + 3); - - // xmm0 = 0 - // xmm2 = x0 - // xmm3 = y0 - // xmm5 = uv1 (ltf) - // xmm6 = uf - // xmm7 = vf - - if(m_sel.ltf) - { - // GSVector4i x1 = uv1.upl16(); - // GSVector4i y1 = uv1.uph16() << tw; - - vpunpcklwd(xmm4, xmm5, xmm0); - vpunpckhwd(xmm5, xmm5, xmm0); - vpslld(xmm5, m_sel.tw + 3); - - // xmm2 = x0 - // xmm3 = y0 - // xmm4 = x1 - // xmm5 = y1 - // xmm6 = uf - // xmm7 = vf - - // GSVector4i addr00 = y0 + x0; - // GSVector4i addr01 = y0 + x1; - // GSVector4i addr10 = y1 + x0; - // GSVector4i addr11 = y1 + x1; - - vpaddd(xmm0, xmm3, xmm2); - vpaddd(xmm1, xmm3, xmm4); - vpaddd(xmm2, xmm5, xmm2); - vpaddd(xmm3, xmm5, xmm4); - - // xmm0 = addr00 - // xmm1 = addr01 - // xmm2 = addr10 - // xmm3 = addr11 - // xmm6 = uf - // xmm7 = vf - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 0); - - // xmm0 = c00 - // xmm1 = c01 - // xmm2 = c10 - // xmm3 = c11 - // xmm6 = uf - // xmm7 = vf - - // GSVector4i rb00 = c00 & mask; - // GSVector4i ga00 = (c00 >> 8) & mask; - - vpsllw(xmm4, xmm0, 8); - vpsrlw(xmm4, 8); - vpsrlw(xmm5, xmm0, 8); - - // GSVector4i rb01 = c01 & mask; - // GSVector4i ga01 = (c01 >> 8) & mask; - - vpsllw(xmm0, xmm1, 8); - vpsrlw(xmm0, 8); - vpsrlw(xmm1, 8); - - // xmm0 = rb01 - // xmm1 = ga01 - // xmm2 = c10 - // xmm3 = c11 - // xmm4 = rb00 - // xmm5 = ga00 - // xmm6 = uf - // xmm7 = vf - - // rb00 = rb00.lerp16<0>(rb01, uf); - // ga00 = ga00.lerp16<0>(ga01, uf); - - lerp16(xmm0, xmm4, xmm6, 0); - lerp16(xmm1, xmm5, xmm6, 0); - - // xmm0 = rb00 - // xmm1 = ga00 - // xmm2 = c10 - // xmm3 = c11 - // xmm6 = uf - // xmm7 = vf - - // GSVector4i rb10 = c10 & mask; - // GSVector4i ga10 = (c10 >> 8) & mask; - - vpsrlw(xmm5, xmm2, 8); - vpsllw(xmm2, 8); - vpsrlw(xmm4, xmm2, 8); - - // GSVector4i rb11 = c11 & mask; - // GSVector4i ga11 = (c11 >> 8) & mask; - - vpsrlw(xmm2, xmm3, 8); - vpsllw(xmm3, 8); - vpsrlw(xmm3, 8); - - // xmm0 = rb00 - // xmm1 = ga00 - // xmm2 = rb11 - // xmm3 = ga11 - // xmm4 = rb10 - // xmm5 = ga10 - // xmm6 = uf - // xmm7 = vf - - // rb10 = rb10.lerp16<0>(rb11, uf); - // ga10 = ga10.lerp16<0>(ga11, uf); - - lerp16(xmm2, xmm4, xmm6, 0); - lerp16(xmm3, xmm5, xmm6, 0); - - // xmm0 = rb00 - // xmm1 = ga00 - // xmm2 = rb10 - // xmm3 = ga10 - // xmm7 = vf - - // rb00 = rb00.lerp16<0>(rb10, vf); - // ga00 = ga00.lerp16<0>(ga10, vf); - - lerp16(xmm2, xmm0, xmm7, 0); - lerp16(xmm3, xmm1, xmm7, 0); - } - else - { - // GSVector4i addr00 = y0 + x0; - - vpaddd(xmm3, xmm2); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 0); - - // GSVector4i mask = GSVector4i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - vpsrlw(xmm3, xmm2, 8); - vpsllw(xmm2, 8); - vpsrlw(xmm2, 8); - } - - // xmm2 = rb - // xmm3 = ga -} - -void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv) -{ - // xmm0, xmm1, xmm2, xmm3 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vpmaxsw(uv, ptr[r12 + offsetof(GSScanlineGlobalData, t.min)]); - } - else - { - vpxor(xmm0, xmm0); - vpmaxsw(uv, xmm0); - } - - vpminsw(uv, ptr[r12 + offsetof(GSScanlineGlobalData, t.max)]); - } - else - { - vpand(uv, ptr[r12 + offsetof(GSScanlineGlobalData, t.min)]); - - if(region) - { - vpor(uv, ptr[r12 + offsetof(GSScanlineGlobalData, t.max)]); - } - } - } - else - { - vmovdqa(xmm2, ptr[r12 + offsetof(GSScanlineGlobalData, t.min)]); - vmovdqa(xmm3, ptr[r12 + offsetof(GSScanlineGlobalData, t.max)]); - vmovdqa(xmm0, ptr[r12 + offsetof(GSScanlineGlobalData, t.mask)]); - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv, xmm2); - - if(region) - { - vpor(xmm1, xmm3); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv, xmm2); - vpminsw(uv, xmm3); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv, xmm1, xmm0); - } -} - -void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1) -{ - // xmm0, xmm1, xmm2, xmm3 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vmovdqa(xmm0, ptr[r12 + offsetof(GSScanlineGlobalData, t.min)]); - vpmaxsw(uv0, xmm0); - vpmaxsw(uv1, xmm0); - } - else - { - vpxor(xmm0, xmm0); - vpmaxsw(uv0, xmm0); - vpmaxsw(uv1, xmm0); - } - - vmovdqa(xmm0, ptr[r12 + offsetof(GSScanlineGlobalData, t.max)]); - vpminsw(uv0, xmm0); - vpminsw(uv1, xmm0); - } - else - { - vmovdqa(xmm0, ptr[r12 + offsetof(GSScanlineGlobalData, t.min)]); - vpand(uv0, xmm0); - vpand(uv1, xmm0); - - if(region) - { - vmovdqa(xmm0, ptr[r12 + offsetof(GSScanlineGlobalData, t.max)]); - vpor(uv0, xmm0); - vpor(uv1, xmm0); - } - } - } - else - { - vmovdqa(xmm2, ptr[r12 + offsetof(GSScanlineGlobalData, t.min)]); - vmovdqa(xmm3, ptr[r12 + offsetof(GSScanlineGlobalData, t.max)]); - vmovdqa(xmm0, ptr[r12 + offsetof(GSScanlineGlobalData, t.mask)]); - - // uv0 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv0, xmm2); - - if(region) - { - vpor(xmm1, xmm3); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv0, xmm2); - vpminsw(uv0, xmm3); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv0, xmm1, xmm0); - - // uv1 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv1, xmm2); - - if(region) - { - vpor(xmm1, xmm3); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv1, xmm2); - vpminsw(uv1, xmm3); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv1, xmm1, xmm0); - } -} - -void GSDrawScanlineCodeGenerator::SampleTextureLOD() -{ -} - -void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv) -{ -} - -void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1) -{ -} - -void GSDrawScanlineCodeGenerator::AlphaTFX() -{ - if(!m_sel.fb) - { - return; - } - - switch(m_sel.tfx) - { - case TFX_MODULATE: - - // gat = gat.modulate16<1>(ga).clamp8(); - - modulate16(xmm3, xmm14, 1); - - clamp16(xmm3, xmm0); - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(xmm1, xmm14, 7); - - mix16(xmm3, xmm1, xmm0); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(xmm1, xmm14, 7); - - mix16(xmm3, xmm1, xmm0); - } - - break; - - case TFX_HIGHLIGHT: - - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); - - vpsrlw(xmm1, xmm14, 7); - - if(m_sel.tcc) - { - vpaddusb(xmm1, xmm3); - } - - mix16(xmm3, xmm1, xmm0); - - break; - - case TFX_HIGHLIGHT2: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(xmm1, xmm14, 7); - - mix16(xmm3, xmm1, xmm0); - } - - break; - - case TFX_NONE: - - // gat = iip ? ga.srl16(7) : ga; - - if(m_sel.iip) - { - vpsrlw(xmm3, xmm14, 7); - } - - break; - } - - // TODO: aa1 -} - -void GSDrawScanlineCodeGenerator::ReadMask() -{ - if(m_sel.fwrite) - { - vmovdqa(xmm4, ptr[r12 + offsetof(GSScanlineGlobalData, fm)]); - } - - if(m_sel.zwrite) - { - vmovdqa(xmm5, ptr[r12 + offsetof(GSScanlineGlobalData, zm)]); - } -} - -void GSDrawScanlineCodeGenerator::TestAlpha() -{ - switch(m_sel.afail) - { - case AFAIL_FB_ONLY: - if(!m_sel.zwrite) return; - break; - - case AFAIL_ZB_ONLY: - if(!m_sel.fwrite) return; - break; - - case AFAIL_RGB_ONLY: - if(!m_sel.zwrite && m_sel.fpsm == 1) return; - break; - } - - switch(m_sel.atst) - { - case ATST_NEVER: - // t = GSVector4i::xffffffff(); - vpcmpeqd(xmm1, xmm1); - break; - - case ATST_ALWAYS: - return; - - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - vpsrld(xmm1, xmm3, 16); - vpcmpgtd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]); - break; - - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - vpsrld(xmm1, xmm3, 16); - vpcmpeqd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]); - vpcmpeqd(xmm0, xmm0); - vpxor(xmm1, xmm0); - break; - - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - vpsrld(xmm0, xmm3, 16); - vmovdqa(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]); - vpcmpgtd(xmm1, xmm0); - break; - - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - vpsrld(xmm1, xmm3, 16); - vpcmpeqd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]); - break; - } - - switch(m_sel.afail) - { - case AFAIL_KEEP: - // test |= t; - vpor(xmm15, xmm1); - alltrue(); - break; - - case AFAIL_FB_ONLY: - // zm |= t; - vpor(xmm5, xmm1); - break; - - case AFAIL_ZB_ONLY: - // fm |= t; - vpor(xmm4, xmm1); - break; - - case AFAIL_RGB_ONLY: - // zm |= t; - vpor(xmm5, xmm1); - // fm |= t & GSVector4i::xff000000(); - vpsrld(xmm1, 24); - vpslld(xmm1, 24); - vpor(xmm4, xmm1); - break; - } -} - -void GSDrawScanlineCodeGenerator::ColorTFX() -{ - if(!m_sel.fwrite) - { - return; - } - - switch(m_sel.tfx) - { - case TFX_MODULATE: - - // rbt = rbt.modulate16<1>(rb).clamp8(); - - modulate16(xmm2, xmm13, 1); - - clamp16(xmm2, xmm0); - - break; - - case TFX_DECAL: - - break; - - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - - vmovdqa(xmm1, xmm3); - - modulate16(xmm3, xmm14, 1); - - vpshuflw(xmm6, xmm14, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(xmm6, xmm6, _MM_SHUFFLE(3, 3, 1, 1)); - vpsrlw(xmm6, 7); - - vpaddw(xmm3, xmm6); - - clamp16(xmm3, xmm0); - - mix16(xmm3, xmm1, xmm0); - - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - - modulate16(xmm2, xmm13, 1); - - vpaddw(xmm2, xmm6); - - clamp16(xmm2, xmm0); - - break; - - case TFX_NONE: - - // rbt = iip ? rb.srl16(7) : rb; - - if(m_sel.iip) - { - vpsrlw(xmm2, xmm13, 7); - } - - break; - } -} - -void GSDrawScanlineCodeGenerator::Fog() -{ - if(!m_sel.fwrite || !m_sel.fge) - { - return; - } - - // rb = m_local.gd->frb.lerp16<0>(rb, f); - // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); - - vmovdqa(xmm0, ptr[r12 + offsetof(GSScanlineGlobalData, frb)]); - vmovdqa(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, fga)]); - - vmovdqa(xmm6, xmm3); - - lerp16(xmm2, xmm0, xmm9, 0); - lerp16(xmm3, xmm1, xmm9, 0); - - mix16(xmm3, xmm6, xmm9); -} - -void GSDrawScanlineCodeGenerator::ReadFrame() -{ - if(!m_sel.fb) - { - return; - } - - // int fa = fza_base.x + fza_offset->x; - - mov(ebx, dword[rsi]); - add(ebx, dword[rdi]); - movsxd(rbx, ebx); - - if(!m_sel.rfb) - { - return; - } - - ReadPixel(xmm6, rbx); -} - -void GSDrawScanlineCodeGenerator::TestDestAlpha() -{ - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) - { - return; - } - - // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); - - if(m_sel.datm) - { - if(m_sel.fpsm == 2) - { - vpxor(xmm0, xmm0); - //vpsrld(xmm1, xmm6, 15); - vpslld(xmm1, xmm6, 16); - vpsrad(xmm1, 31); - vpcmpeqd(xmm1, xmm0); - } - else - { - vpcmpeqd(xmm0, xmm0); - vpxor(xmm1, xmm6, xmm0); - vpsrad(xmm1, 31); - } - } - else - { - if(m_sel.fpsm == 2) - { - vpslld(xmm1, xmm6, 16); - vpsrad(xmm1, 31); - } - else - { - vpsrad(xmm1, xmm6, 31); - } - } - - vpor(xmm15, xmm1); - - alltrue(); -} - -void GSDrawScanlineCodeGenerator::WriteMask() -{ - // fm |= test; - // zm |= test; - - if(m_sel.fwrite) - { - vpor(xmm4, xmm15); - } - - if(m_sel.zwrite) - { - vpor(xmm5, xmm15); - } - - // int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask(); - - vpcmpeqd(xmm1, xmm1); - - if(m_sel.fwrite && m_sel.zwrite) - { - vpcmpeqd(xmm0, xmm1, xmm5); - vpcmpeqd(xmm1, xmm4); - vpackssdw(xmm1, xmm0); - } - else if(m_sel.fwrite) - { - vpcmpeqd(xmm1, xmm4); - vpackssdw(xmm1, xmm1); - } - else if(m_sel.zwrite) - { - vpcmpeqd(xmm1, xmm5); - vpackssdw(xmm1, xmm1); - } - - vpmovmskb(edx, xmm1); - - not(edx); -} - -void GSDrawScanlineCodeGenerator::WriteZBuf() -{ - if(!m_sel.zwrite) - { - return; - } - - bool fast = m_sel.ztest && m_sel.zpsm < 2; - - vmovdqa(xmm1, ptr[r11 + offsetof(GSScanlineLocalData, temp.zs)]); - - if(fast) - { - // zs = zs.blend8(zd, zm); - - vpblendvb(xmm1, ptr[r11 + offsetof(GSScanlineLocalData, temp.zd)], xmm4); - } - - WritePixel(xmm1, rbp, dh, fast, m_sel.zpsm, 1); -} - -void GSDrawScanlineCodeGenerator::AlphaBlend() -{ - if(!m_sel.fwrite) - { - return; - } - - if(m_sel.abe == 0 && m_sel.aa1 == 0) - { - return; - } - - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) - { - switch(m_sel.fpsm) - { - case 0: - case 1: - - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; - - vpsllw(xmm0, xmm6, 8); - vpsrlw(xmm0, 8); - vpsrlw(xmm1, xmm6, 8); - - break; - - case 2: - - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - - vpcmpeqd(xmm15, xmm15); - - vpsrld(xmm15, 27); // 0x0000001f - vpand(xmm0, xmm6, xmm15); - vpslld(xmm0, 3); - - vpslld(xmm15, 10); // 0x00007c00 - vpand(xmm5, xmm6, xmm15); - vpslld(xmm5, 9); - - vpor(xmm0, xmm1); - - vpsrld(xmm15, 5); // 0x000003e0 - vpand(xmm1, xmm6, xmm15); - vpsrld(xmm1, 2); - - vpsllw(xmm15, 10); // 0x00008000 - vpand(xmm5, xmm6, xmm15); - vpslld(xmm5, 8); - - vpor(xmm1, xmm5); - - break; - } - } - - // xmm2, xmm3 = src rb, ga - // xmm0, xmm1 = dst rb, ga - // xmm5, xmm15 = free - - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) - { - vmovdqa(xmm5, xmm2); - } - - if(m_sel.aba != m_sel.abb) - { - // rb = c[aba * 2 + 0]; - - switch(m_sel.aba) - { - case 0: break; - case 1: vmovdqa(xmm2, xmm0); break; - case 2: vpxor(xmm2, xmm2); break; - } - - // rb = rb.sub16(c[abb * 2 + 0]); - - switch(m_sel.abb) - { - case 0: vpsubw(xmm2, xmm5); break; - case 1: vpsubw(xmm2, xmm0); break; - case 2: break; - } - - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) - { - // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - - switch(m_sel.abc) - { - case 0: - case 1: - vpshuflw(xmm15, m_sel.abc ? xmm1 : xmm3, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(xmm15, xmm15, _MM_SHUFFLE(3, 3, 1, 1)); - vpsllw(xmm15, 7); - break; - case 2: - vmovdqa(xmm15, ptr[r12 + offsetof(GSScanlineGlobalData, afix)]); - break; - } - - // rb = rb.modulate16<1>(a); - - modulate16(xmm2, xmm15, 1); - } - - // rb = rb.add16(c[abd * 2 + 0]); - - switch(m_sel.abd) - { - case 0: vpaddw(xmm2, xmm5); break; - case 1: vpaddw(xmm2, xmm0); break; - case 2: break; - } - } - else - { - // rb = c[abd * 2 + 0]; - - switch(m_sel.abd) - { - case 0: break; - case 1: vmovdqa(xmm2, xmm0); break; - case 2: vpxor(xmm2, xmm2); break; - } - } - - if(m_sel.pabe) - { - // mask = (c[1] << 8).sra32(31); - - vpslld(xmm0, xmm3, 8); - vpsrad(xmm0, 31); - - // rb = c[0].blend8(rb, mask); - - vpblendvb(xmm2, xmm5, xmm2, xmm0); - } - - // xmm0 = pabe mask - // xmm3 = src ga - // xmm1 = dst ga - // xmm2 = rb - // xmm15 = a - // xmm5 = free - - vmovdqa(xmm5, xmm3); - - if(m_sel.aba != m_sel.abb) - { - // ga = c[aba * 2 + 1]; - - switch(m_sel.aba) - { - case 0: break; - case 1: vmovdqa(xmm3, xmm1); break; - case 2: vpxor(xmm3, xmm3); break; - } - - // ga = ga.sub16(c[abeb * 2 + 1]); - - switch(m_sel.abb) - { - case 0: vpsubw(xmm3, xmm5); break; - case 1: vpsubw(xmm3, xmm1); break; - case 2: break; - } - - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) - { - // ga = ga.modulate16<1>(a); - - modulate16(xmm3, xmm15, 1); - } - - // ga = ga.add16(c[abd * 2 + 1]); - - switch(m_sel.abd) - { - case 0: vpaddw(xmm3, xmm5); break; - case 1: vpaddw(xmm3, xmm1); break; - case 2: break; - } - } - else - { - // ga = c[abd * 2 + 1]; - - switch(m_sel.abd) - { - case 0: break; - case 1: vmovdqa(xmm3, xmm1); break; - case 2: vpxor(xmm3, xmm3); break; - } - } - - // xmm0 = pabe mask - // xmm5 = src ga - // xmm2 = rb - // xmm3 = ga - // xmm1, xmm15 = free - - if(m_sel.pabe) - { - vpsrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) - - // ga = c[1].blend8(ga, mask).mix16(c[1]); - - vpblendvb(xmm3, xmm5, xmm3, xmm0); - } - else - { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx - { - mix16(xmm3, xmm5, xmm15); - } - } -} - -void GSDrawScanlineCodeGenerator::WriteFrame() -{ - if(!m_sel.fwrite) - { - return; - } - - if(m_sel.colclamp == 0) - { - // c[0] &= 0x00ff00ff; - // c[1] &= 0x00ff00ff; - - vpcmpeqd(xmm15, xmm15); - vpsrlw(xmm15, 8); - vpand(xmm2, xmm15); - vpand(xmm3, xmm15); - } - - if(m_sel.fpsm == 2 && m_sel.dthe) - { - mov(rax, r8); - and(rax, 3); - shl(rax, 5); - vpaddw(xmm2, ptr[r12 + rax + offsetof(GSScanlineGlobalData, dimx) + sizeof(GSVector4i) * 0]); - vpaddw(xmm3, ptr[r12 + rax + offsetof(GSScanlineGlobalData, dimx) + sizeof(GSVector4i) * 1]); - } - - // GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1])); - - vpunpckhwd(xmm15, xmm2, xmm3); - vpunpcklwd(xmm2, xmm3); - vpackuswb(xmm2, xmm15); - - if(m_sel.fba && m_sel.fpsm != 1) - { - // fs |= 0x80000000; - - vpcmpeqd(xmm15, xmm15); - vpslld(xmm15, 31); - vpor(xmm2, xmm15); - } - - // xmm2 = fs - // xmm4 = fm - // xmm6 = fd - - if(m_sel.fpsm == 2) - { - // GSVector4i rb = fs & 0x00f800f8; - // GSVector4i ga = fs & 0x8000f800; - - mov(eax, 0x00f800f8); - vmovd(xmm0, eax); - vpshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - - mov(eax, 0x8000f800); - vmovd(xmm1, eax); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0)); - - vpand(xmm0, xmm2); - vpand(xmm1, xmm2); - - // fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); - - vpsrld(xmm2, xmm0, 9); - vpsrld(xmm0, 3); - vpsrld(xmm3, xmm1, 16); - vpsrld(xmm1, 6); - - vpor(xmm0, xmm1); - vpor(xmm2, xmm3); - vpor(xmm2, xmm0); - } - - if(m_sel.rfb) - { - // fs = fs.blend(fd, fm); - - blend(xmm2, xmm6, xmm4); // TODO: could be skipped in certain cases, depending on fpsm and fm - } - - bool fast = m_sel.rfb && m_sel.fpsm < 2; - - WritePixel(xmm2, rbx, dl, fast, m_sel.fpsm, 0); -} - -void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg64& addr) -{ - vmovq(dst, qword[r13 + addr * 2]); - vmovhps(dst, qword[r13 + addr * 2 + 8 * 2]); -} - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr, const Reg8& mask, bool fast, int psm, int fz) -{ - if(fast) - { - // if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs); - // if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs); - - test(mask, 0x0f); - je("@f"); - vmovq(qword[r13 + addr * 2], src); - L("@@"); - - test(mask, 0xf0); - je("@f"); - vmovhps(qword[r13 + addr * 2 + 8 * 2], src); - L("@@"); - - // vmaskmovps? - } - else - { - // if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>()); - // if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>()); - // if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>()); - // if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>()); - - test(mask, 0x03); - je("@f"); - WritePixel(src, addr, 0, psm); - L("@@"); - - test(mask, 0x0c); - je("@f"); - WritePixel(src, addr, 1, psm); - L("@@"); - - test(mask, 0x30); - je("@f"); - WritePixel(src, addr, 2, psm); - L("@@"); - - test(mask, 0xc0); - je("@f"); - WritePixel(src, addr, 3, psm); - L("@@"); - } -} - -static const int s_offsets[4] = {0, 2, 8, 10}; - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr, uint8 i, int psm) -{ - Address dst = ptr[r13 + addr * 2 + s_offsets[i] * 2]; - - switch(psm) - { - case 0: - if(i == 0) vmovd(dst, src); - else vpextrd(dst, src, i); - break; - case 1: - if(i == 0) vmovd(eax, src); - else vpextrd(eax, src, i); - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - vpextrw(eax, src, i * 2); - mov(dst, ax); - break; - } -} - -void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) -{ - // TODO -} - -void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i) -{ - const Address& src = m_sel.tlu ? ptr[r12 + rax * 4 + offsetof(GSScanlineGlobalData, clut)] : ptr[rbx + rax * 4]; - - if(i == 0) vmovd(eax, addr); - else vpextrd(eax, addr, i); - - if(m_sel.tlu) movzx(rax, byte[rbx + rax]); - - if(i == 0) vmovd(dst, src); - else vpinsrd(dst, src, i); -} - -#endif diff --git a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x64.cpp b/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x64.cpp deleted file mode 100644 index 40631c1c9e..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x64.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDrawScanlineCodeGenerator.h" - -#if _M_SSE < 0x500 && (defined(_M_AMD64) || defined(_WIN64)) - -void GSDrawScanlineCodeGenerator::Generate() -{ -} - -void GSDrawScanlineCodeGenerator::Init() -{ -} - -void GSDrawScanlineCodeGenerator::Step() -{ -} - -void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) -{ -} - -void GSDrawScanlineCodeGenerator::SampleTexture() -{ -} - -void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv) -{ -} - -void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1) -{ -} - -void GSDrawScanlineCodeGenerator::AlphaTFX() -{ -} - -void GSDrawScanlineCodeGenerator::ReadMask() -{ -} - -void GSDrawScanlineCodeGenerator::TestAlpha() -{ -} - -void GSDrawScanlineCodeGenerator::ColorTFX() -{ -} - -void GSDrawScanlineCodeGenerator::Fog() -{ -} - -void GSDrawScanlineCodeGenerator::ReadFrame() -{ -} - -void GSDrawScanlineCodeGenerator::TestDestAlpha() -{ -} - -void GSDrawScanlineCodeGenerator::WriteMask() -{ -} - -void GSDrawScanlineCodeGenerator::WriteZBuf() -{ -} - -void GSDrawScanlineCodeGenerator::AlphaBlend() -{ -} - -void GSDrawScanlineCodeGenerator::WriteFrame() -{ -} - -void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg64& addr) -{ -} - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr, const Reg8& mask, bool fast, int psm, int fz) -{ -} - -static const int s_offsets[4] = {0, 2, 8, 10}; - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr, uint8 i, int psm) -{ -} - -void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) -{ -} - -void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i) -{ -} - -#endif diff --git a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.avx.cpp deleted file mode 100644 index 9f7b7ef74c..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ /dev/null @@ -1,2921 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDrawScanlineCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE == 0x500 && !(defined(_M_AMD64) || defined(_WIN64)) - -static const int _args = 16; -static const int _top = _args + 4; -static const int _v = _args + 8; - -void GSDrawScanlineCodeGenerator::Generate() -{ -//ret(8); - push(ebx); - push(esi); - push(edi); - push(ebp); - - Init(); - - if(!m_sel.edge) - { - align(16); - } - -L("loop"); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // xmm0 = z/zi - // xmm2 = s/u (tme) - // xmm3 = t/v (tme) - // xmm4 = q (tme) - // xmm5 = rb (!tme) - // xmm6 = ga (!tme) - // xmm7 = test - - bool tme = m_sel.tfx != TFX_NONE; - - TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // - xmm0 - // xmm2 = s/u (tme) - // xmm3 = t/v (tme) - // xmm4 = q (tme) - // xmm5 = rb (!tme) - // xmm6 = ga (!tme) - // xmm7 = test - - if(m_sel.mmin) - { - SampleTextureLOD(); - } - else - { - SampleTexture(); - } - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // - xmm2 - // - xmm3 - // - xmm4 - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - AlphaTFX(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - ReadMask(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - TestAlpha(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - ColorTFX(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - Fog(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - ReadFrame(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = fd - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - TestDestAlpha(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = fd - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - WriteMask(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = fd - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - - WriteZBuf(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // - ebp - // xmm2 = fd - // xmm3 = fm - // - xmm4 - // xmm5 = rb - // xmm6 = ga - - AlphaBlend(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // xmm2 = fd - // xmm3 = fm - // xmm5 = rb - // xmm6 = ga - - WriteFrame(); - -L("step"); - - // if(steps <= 0) break; - - if(!m_sel.edge) - { - test(ecx, ecx); - - jle("exit", T_NEAR); - - Step(); - - jmp("loop", T_NEAR); - } - -L("exit"); - - // vzeroupper(); - - pop(ebp); - pop(edi); - pop(esi); - pop(ebx); - - ret(8); -} - -void GSDrawScanlineCodeGenerator::Init() -{ - if(!m_sel.notest) - { - // int skip = left & 3; - - mov(ebx, edx); - and(edx, 3); - - // int steps = pixels + skip - 4; - - lea(ecx, ptr[ecx + edx - 4]); - - // left -= skip; - - sub(ebx, edx); - - // GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))]; - - shl(edx, 4); - - vmovdqa(xmm7, ptr[edx + (size_t)&m_test[0]]); - - mov(eax, ecx); - sar(eax, 31); - and(eax, ecx); - shl(eax, 4); - - vpor(xmm7, ptr[eax + (size_t)&m_test[7]]); - } - else - { - mov(ebx, edx); // left - xor(edx, edx); // skip - lea(ecx, ptr[ecx - 4]); // steps - } - - // GSVector2i* fza_base = &m_local.gd->fzbr[top]; - - mov(esi, ptr[esp + _top]); - lea(esi, ptr[esi * 8]); - add(esi, ptr[&m_local.gd->fzbr]); - - // GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2]; - - lea(edi, ptr[ebx * 2]); - add(edi, ptr[&m_local.gd->fzbc]); - - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) - { - // edx = &m_local.d[skip] - - lea(edx, ptr[edx * 8 + (size_t)m_local.d]); - - // ebx = &v - - mov(ebx, ptr[esp + _v]); - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) - { - vmovaps(xmm0, ptr[ebx + offsetof(GSVertexSW, p)]); // v.p - - if(m_sel.fwrite && m_sel.fge) - { - // f = GSVector4i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); - - vcvttps2dq(xmm1, xmm0); - vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vpaddw(xmm1, ptr[edx + offsetof(GSScanlineLocalData::skip, f)]); - - vmovdqa(ptr[&m_local.temp.f], xmm1); - } - - if(m_sel.zb) - { - // z = vp.zzzz() + m_local.d[skip].z; - - vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - vmovaps(ptr[&m_local.temp.z], xmm0); - vmovaps(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]); - vmovaps(ptr[&m_local.temp.zo], xmm2); - vaddps(xmm0, xmm2); - } - } - } - else - { - if(m_sel.ztest) - { - vmovdqa(xmm0, ptr[&m_local.p.z]); - } - } - - if(m_sel.fb) - { - if(m_sel.edge || m_sel.tfx != TFX_NONE) - { - vmovaps(xmm4, ptr[ebx + offsetof(GSVertexSW, t)]); // v.t - } - - if(m_sel.edge) - { - // m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9); - - vpshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3)); - vpsrlw(xmm3, 9); - - vmovdqa(ptr[&m_local.temp.cov], xmm3); - } - - if(m_sel.tfx != TFX_NONE) - { - if(m_sel.fst) - { - // GSVector4i vti(vt); - - vcvttps2dq(xmm6, xmm4); - - // s = vti.xxxx() + m_local.d[skip].s; - // t = vti.yyyy(); if(!sprite) t += m_local.d[skip].t; - - vpshufd(xmm2, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm3, xmm6, _MM_SHUFFLE(1, 1, 1, 1)); - - vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) - { - vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); - } - else - { - if(m_sel.ltf) - { - vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm6, 12); - vmovdqa(ptr[&m_local.temp.vf], xmm6); - } - } - - vmovdqa(ptr[&m_local.temp.s], xmm2); - vmovdqa(ptr[&m_local.temp.t], xmm3); - } - else - { - // s = vt.xxxx() + m_local.d[skip].s; - // t = vt.yyyy() + m_local.d[skip].t; - // q = vt.zzzz() + m_local.d[skip].q; - - vshufps(xmm2, xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(xmm3, xmm4, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(xmm4, xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - - vaddps(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - vaddps(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); - vaddps(xmm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]); - - vmovaps(ptr[&m_local.temp.s], xmm2); - vmovaps(ptr[&m_local.temp.t], xmm3); - vmovaps(ptr[&m_local.temp.q], xmm4); - } - } - - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) - { - if(m_sel.iip) - { - // GSVector4i vc = GSVector4i(v.c); - - vcvttps2dq(xmm6, ptr[ebx + offsetof(GSVertexSW, c)]); // v.c - - // vc = vc.upl16(vc.zwxy()); - - vpshufd(xmm5, xmm6, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(xmm6, xmm5); - - // rb = vc.xxxx().add16(m_local.d[skip].rb); - // ga = vc.zzzz().add16(m_local.d[skip].ga); - - vpshufd(xmm5, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2)); - - vpaddw(xmm5, ptr[edx + offsetof(GSScanlineLocalData::skip, rb)]); - vpaddw(xmm6, ptr[edx + offsetof(GSScanlineLocalData::skip, ga)]); - - vmovdqa(ptr[&m_local.temp.rb], xmm5); - vmovdqa(ptr[&m_local.temp.ga], xmm6); - } - else - { - if(m_sel.tfx == TFX_NONE) - { - vmovdqa(xmm5, ptr[&m_local.c.rb]); - vmovdqa(xmm6, ptr[&m_local.c.ga]); - } - } - } - } -} - -void GSDrawScanlineCodeGenerator::Step() -{ - // steps -= 4; - - sub(ecx, 4); - - // fza_offset++; - - add(edi, 8); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // z += m_local.d4.z; - - if(m_sel.zb) - { - vmovaps(xmm0, ptr[&m_local.temp.zo]); - vaddps(xmm0, ptr[&m_local.d4.z]); - vmovaps(ptr[&m_local.temp.zo], xmm0); - vaddps(xmm0, ptr[&m_local.temp.z]); - } - - // f = f.add16(m_local.d4.f); - - if(m_sel.fwrite && m_sel.fge) - { - vmovdqa(xmm1, ptr[&m_local.temp.f]); - vpaddw(xmm1, ptr[&m_local.d4.f]); - vmovdqa(ptr[&m_local.temp.f], xmm1); - } - } - else - { - if(m_sel.ztest) - { - vmovdqa(xmm0, ptr[&m_local.p.z]); - } - } - - if(m_sel.fb) - { - if(m_sel.tfx != TFX_NONE) - { - if(m_sel.fst) - { - // GSVector4i stq = m_local.d4.stq; - - // s += stq.xxxx(); - // if(!sprite) t += stq.yyyy(); - - vmovdqa(xmm4, ptr[&m_local.d4.stq]); - - vpshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - vpaddd(xmm2, ptr[&m_local.temp.s]); - vmovdqa(ptr[&m_local.temp.s], xmm2); - - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) - { - vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); - vpaddd(xmm3, ptr[&m_local.temp.t]); - vmovdqa(ptr[&m_local.temp.t], xmm3); - } - else - { - vmovdqa(xmm3, ptr[&m_local.temp.t]); - } - } - else - { - // GSVector4 stq = m_local.d4.stq; - - // s += stq.xxxx(); - // t += stq.yyyy(); - // q += stq.zzzz(); - - vmovaps(xmm4, ptr[&m_local.d4.stq]); - - vshufps(xmm2, xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(xmm3, xmm4, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(xmm4, xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - - vaddps(xmm2, ptr[&m_local.temp.s]); - vaddps(xmm3, ptr[&m_local.temp.t]); - vaddps(xmm4, ptr[&m_local.temp.q]); - - vmovaps(ptr[&m_local.temp.s], xmm2); - vmovaps(ptr[&m_local.temp.t], xmm3); - vmovaps(ptr[&m_local.temp.q], xmm4); - } - } - - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) - { - if(m_sel.iip) - { - // GSVector4i c = m_local.d4.c; - - // rb = rb.add16(c.xxxx()); - // ga = ga.add16(c.yyyy()); - - vmovdqa(xmm7, ptr[&m_local.d4.c]); - - vpshufd(xmm5, xmm7, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm6, xmm7, _MM_SHUFFLE(1, 1, 1, 1)); - - vpaddw(xmm5, ptr[&m_local.temp.rb]); - vpaddw(xmm6, ptr[&m_local.temp.ga]); - - // FIXME: color may underflow and roll over at the end of the line, if decreasing - - vpxor(xmm7, xmm7); - vpmaxsw(xmm5, xmm7); - vpmaxsw(xmm6, xmm7); - - vmovdqa(ptr[&m_local.temp.rb], xmm5); - vmovdqa(ptr[&m_local.temp.ga], xmm6); - } - else - { - if(m_sel.tfx == TFX_NONE) - { - vmovdqa(xmm5, ptr[&m_local.c.rb]); - vmovdqa(xmm6, ptr[&m_local.c.ga]); - } - } - } - } - - if(!m_sel.notest) - { - // test = m_test[7 + (steps & (steps >> 31))]; - - mov(edx, ecx); - sar(edx, 31); - and(edx, ecx); - shl(edx, 4); - - vmovdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); - } -} - -void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) -{ - if(!m_sel.zb) - { - return; - } - - // int za = fza_base.y + fza_offset->y; - - mov(ebp, ptr[esi + 4]); - add(ebp, ptr[edi + 4]); - - // GSVector4i zs = zi; - - if(m_sel.prim != GS_SPRITE_CLASS) - { - if(m_sel.zoverflow) - { - // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); - - vbroadcastss(temp1, ptr[&GSVector4::m_half]); - vmulps(temp1, xmm0); - vcvttps2dq(temp1, temp1); - vpslld(temp1, 1); - - vcvttps2dq(xmm0, xmm0); - vpcmpeqd(temp2, temp2); - vpsrld(temp2, 31); - vpand(xmm0, temp2); - - vpor(xmm0, temp1); - } - else - { - // zs = GSVector4i(z); - - vcvttps2dq(xmm0, xmm0); - } - - if(m_sel.zwrite) - { - vmovdqa(ptr[&m_local.temp.zs], xmm0); - } - } - - if(m_sel.ztest) - { - ReadPixel(xmm1, ebp); - - if(m_sel.zwrite && m_sel.zpsm < 2) - { - vmovdqa(ptr[&m_local.temp.zd], xmm1); - } - - // zd &= 0xffffffff >> m_sel.zpsm * 8; - - if(m_sel.zpsm) - { - vpslld(xmm1, m_sel.zpsm * 8); - vpsrld(xmm1, m_sel.zpsm * 8); - } - - if(m_sel.zoverflow || m_sel.zpsm == 0) - { - // GSVector4i o = GSVector4i::x80000000(); - - vpcmpeqd(temp1, temp1); - vpslld(temp1, 31); - - // GSVector4i zso = zs - o; - // GSVector4i zdo = zd - o; - - vpsubd(xmm0, temp1); - vpsubd(xmm1, temp1); - } - - switch(m_sel.ztst) - { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - vpcmpgtd(xmm1, xmm0); - vpor(xmm7, xmm1); - break; - - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - vpcmpgtd(xmm0, xmm1); - vpcmpeqd(temp1, temp1); - vpxor(xmm0, temp1); - vpor(xmm7, xmm0); - break; - } - - alltrue(); - } -} - -void GSDrawScanlineCodeGenerator::SampleTexture() -{ - if(!m_sel.fb || m_sel.tfx == TFX_NONE) - { - return; - } - - mov(ebx, ptr[&m_local.gd->tex[0]]); - - if(m_sel.tlu) - { - mov(edx, ptr[&m_local.gd->clut]); - } - - // ebx = tex - // edx = clut - - if(!m_sel.fst) - { - vrcpps(xmm0, xmm4); - - vmulps(xmm2, xmm0); - vmulps(xmm3, xmm0); - - vcvttps2dq(xmm2, xmm2); - vcvttps2dq(xmm3, xmm3); - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - vmovd(xmm4, eax); - vpshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - - vpsubd(xmm2, xmm4); - vpsubd(xmm3, xmm4); - } - } - - // xmm2 = u - // xmm3 = v - - if(m_sel.ltf) - { - // GSVector4i uf = u.xxzzlh().srl16(1); - - vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 12); - vmovdqa(ptr[&m_local.temp.uf], xmm0); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4i vf = v.xxzzlh().srl16(1); - - vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 12); - vmovdqa(ptr[&m_local.temp.vf], xmm0); - } - } - - // GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - - vpsrad(xmm2, 16); - vpsrad(xmm3, 16); - vpackssdw(xmm2, xmm3); - - if(m_sel.ltf) - { - // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); - - vpcmpeqd(xmm1, xmm1); - vpsrlw(xmm1, 15); - vpaddw(xmm3, xmm2, xmm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - Wrap(xmm2, xmm3); - } - else - { - // uv0 = Wrap(uv0); - - Wrap(xmm2); - } - - // xmm2 = uv0 - // xmm3 = uv1 (ltf) - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i y0 = uv0.uph16() << tw; - // GSVector4i x0 = uv0.upl16(); - - vpxor(xmm0, xmm0); - - vpunpcklwd(xmm4, xmm2, xmm0); - vpunpckhwd(xmm2, xmm2, xmm0); - vpslld(xmm2, m_sel.tw + 3); - - // xmm0 = 0 - // xmm2 = y0 - // xmm3 = uv1 (ltf) - // xmm4 = x0 - // xmm1, xmm5, xmm6 = free - // xmm7 = used - - if(m_sel.ltf) - { - // GSVector4i y1 = uv1.uph16() << tw; - // GSVector4i x1 = uv1.upl16(); - - vpunpcklwd(xmm6, xmm3, xmm0); - vpunpckhwd(xmm3, xmm3, xmm0); - vpslld(xmm3, m_sel.tw + 3); - - // xmm2 = y0 - // xmm3 = y1 - // xmm4 = x0 - // xmm6 = x1 - // xmm0, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i addr00 = y0 + x0; - // GSVector4i addr01 = y0 + x1; - // GSVector4i addr10 = y1 + x0; - // GSVector4i addr11 = y1 + x1; - - vpaddd(xmm5, xmm2, xmm4); - vpaddd(xmm2, xmm2, xmm6); - vpaddd(xmm0, xmm3, xmm4); - vpaddd(xmm3, xmm3, xmm6); - - // xmm5 = addr00 - // xmm2 = addr01 - // xmm0 = addr10 - // xmm3 = addr11 - // xmm1, xmm4, xmm6 = free - // xmm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 0); - - // xmm6 = c00 - // xmm4 = c01 - // xmm1 = c10 - // xmm5 = c11 - // xmm0, xmm2, xmm3 = free - // xmm7 = used - - vmovdqa(xmm0, ptr[&m_local.temp.uf]); - - // GSVector4i rb00 = c00 & mask; - // GSVector4i ga00 = (c00 >> 8) & mask; - - vpsllw(xmm2, xmm6, 8); - vpsrlw(xmm2, 8); - vpsrlw(xmm6, 8); - - // GSVector4i rb01 = c01 & mask; - // GSVector4i ga01 = (c01 >> 8) & mask; - - vpsllw(xmm3, xmm4, 8); - vpsrlw(xmm3, 8); - vpsrlw(xmm4, 8); - - // xmm0 = uf - // xmm2 = rb00 - // xmm3 = rb01 - // xmm6 = ga00 - // xmm4 = ga01 - // xmm1 = c10 - // xmm5 = c11 - // xmm7 = used - - // rb00 = rb00.lerp16_4(rb01, uf); - // ga00 = ga00.lerp16_4(ga01, uf); - - lerp16_4(xmm3, xmm2, xmm0); - lerp16_4(xmm4, xmm6, xmm0); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = c10 - // xmm5 = c11 - // xmm2, xmm6 = free - // xmm7 = used - - // GSVector4i rb10 = c10 & mask; - // GSVector4i ga10 = (c10 >> 8) & mask; - - vpsrlw(xmm2, xmm1, 8); - vpsllw(xmm1, 8); - vpsrlw(xmm1, 8); - - // GSVector4i rb11 = c11 & mask; - // GSVector4i ga11 = (c11 >> 8) & mask; - - vpsrlw(xmm6, xmm5, 8); - vpsllw(xmm5, 8); - vpsrlw(xmm5, 8); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = rb10 - // xmm5 = rb11 - // xmm2 = ga10 - // xmm6 = ga11 - // xmm7 = used - - // rb10 = rb10.lerp16_4(rb11, uf); - // ga10 = ga10.lerp16_4(ga11, uf); - - lerp16_4(xmm5, xmm1, xmm0); - lerp16_4(xmm6, xmm2, xmm0); - - // xmm3 = rb00 - // xmm4 = ga00 - // xmm5 = rb10 - // xmm6 = ga10 - // xmm0, xmm1, xmm2 = free - // xmm7 = used - - // rb00 = rb00.lerp16_4(rb10, vf); - // ga00 = ga00.lerp16_4(ga10, vf); - - vmovdqa(xmm0, ptr[&m_local.temp.vf]); - - lerp16_4(xmm5, xmm3, xmm0); - lerp16_4(xmm6, xmm4, xmm0); - } - else - { - // GSVector4i addr00 = y0 + x0; - - vpaddd(xmm5, xmm2, xmm4); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 0); - - // GSVector4i mask = GSVector4i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - vpsllw(xmm5, xmm6, 8); - vpsrlw(xmm5, 8); - vpsrlw(xmm6, 8); - } -} - -void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv) -{ - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vpmaxsw(uv, ptr[&m_local.gd->t.min]); - } - else - { - vpxor(xmm0, xmm0); - vpmaxsw(uv, xmm0); - } - - vpminsw(uv, ptr[&m_local.gd->t.max]); - } - else - { - vpand(uv, ptr[&m_local.gd->t.min]); - - if(region) - { - vpor(uv, ptr[&m_local.gd->t.max]); - } - } - } - else - { - vmovdqa(xmm4, ptr[&m_local.gd->t.min]); - vmovdqa(xmm5, ptr[&m_local.gd->t.max]); - vmovdqa(xmm0, ptr[&m_local.gd->t.mask]); - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv, xmm4); - - if(region) - { - vpor(xmm1, xmm5); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv, xmm4); - vpminsw(uv, xmm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv, xmm1, xmm0); - } -} - -void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1) -{ - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vmovdqa(xmm4, ptr[&m_local.gd->t.min]); - vpmaxsw(uv0, xmm4); - vpmaxsw(uv1, xmm4); - } - else - { - vpxor(xmm0, xmm0); - vpmaxsw(uv0, xmm0); - vpmaxsw(uv1, xmm0); - } - - vmovdqa(xmm5, ptr[&m_local.gd->t.max]); - vpminsw(uv0, xmm5); - vpminsw(uv1, xmm5); - } - else - { - vmovdqa(xmm4, ptr[&m_local.gd->t.min]); - vpand(uv0, xmm4); - vpand(uv1, xmm4); - - if(region) - { - vmovdqa(xmm5, ptr[&m_local.gd->t.max]); - vpor(uv0, xmm5); - vpor(uv1, xmm5); - } - } - } - else - { - vmovdqa(xmm4, ptr[&m_local.gd->t.min]); - vmovdqa(xmm5, ptr[&m_local.gd->t.max]); - vmovdqa(xmm0, ptr[&m_local.gd->t.mask]); - - // uv0 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv0, xmm4); - - if(region) - { - vpor(xmm1, xmm5); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv0, xmm4); - vpminsw(uv0, xmm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv0, xmm1, xmm0); - - // uv1 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv1, xmm4); - - if(region) - { - vpor(xmm1, xmm5); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv1, xmm4); - vpminsw(uv1, xmm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv1, xmm1, xmm0); - } -} - -void GSDrawScanlineCodeGenerator::SampleTextureLOD() -{ - if(!m_sel.fb || m_sel.tfx == TFX_NONE) - { - return; - } - - push(ebp); - - mov(ebp, (size_t)m_local.gd->tex); - - if(m_sel.tlu) - { - mov(edx, ptr[&m_local.gd->clut]); - } - - if(!m_sel.fst) - { - vrcpps(xmm0, xmm4); - - vmulps(xmm2, xmm0); - vmulps(xmm3, xmm0); - - vcvttps2dq(xmm2, xmm2); - vcvttps2dq(xmm3, xmm3); - } - - // xmm2 = u - // xmm3 = v - // xmm4 = q - // xmm0 = xmm1 = xmm5 = xmm6 = free - - // TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (just take the exp) (but can we round it?) - - if(!m_sel.lcm) - { - // lod = -log2(Q) * (1 << L) + K - - vpcmpeqd(xmm1, xmm1); - vpsrld(xmm1, xmm1, 25); - vpslld(xmm0, xmm4, 1); - vpsrld(xmm0, xmm0, 24); - vpsubd(xmm0, xmm1); - vcvtdq2ps(xmm0, xmm0); - - // xmm0 = (float)(exp(q) - 127) - - vpslld(xmm4, xmm4, 9); - vpsrld(xmm4, xmm4, 9); - vorps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - - // xmm4 = mant(q) | 1.0f - - if(m_cpu.has(util::Cpu::tFMA)) - { - vmovaps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0 - vfmadd213ps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * xmm4 + c1 - vfmadd213ps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * xmm4 + c1) * xmm4 + c2 - vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // xmm4 - 1.0f - vfmadd213ps(xmm4, xmm5, xmm0); // ((c0 * xmm4 + c1) * xmm4 + c2) * (xmm4 - 1.0f) + xmm0 - } - else - { - vmulps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); - vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); - vmulps(xmm5, xmm4); - vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); - vmulps(xmm4, xmm5); - vaddps(xmm4, xmm0); - } - - // xmm4 = log2(Q) = ((((c0 * xmm4) + c1) * xmm4) + c2) * (xmm4 - 1.0f) + xmm0 - - if(m_cpu.has(util::Cpu::tFMA)) - { - vmovaps(xmm5, ptr[&m_local.gd->l]); - vfmadd213ps(xmm4, xmm5, ptr[&m_local.gd->k]); - } - else - { - vmulps(xmm4, ptr[&m_local.gd->l]); - vaddps(xmm4, ptr[&m_local.gd->k]); - } - - // xmm4 = (-log2(Q) * (1 << L) + K) * 0x10000 - - vxorps(xmm0, xmm0); - vminps(xmm4, ptr[&m_local.gd->mxl]); - vmaxps(xmm4, xmm0); - vcvtps2dq(xmm4, xmm4); - - if(m_sel.mmin == 1) // round-off mode - { - mov(eax, 0x8000); - vmovd(xmm0, eax); - vpshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpaddd(xmm4, xmm0); - } - - vpsrld(xmm0, xmm4, 16); - - vmovdqa(ptr[&m_local.temp.lod.i], xmm0); -/* -vpslld(xmm5, xmm0, 6); -vpslld(xmm6, xmm4, 16); -vpsrld(xmm6, xmm6, 24); -return; -*/ - if(m_sel.mmin == 2) // trilinear mode - { - vpshuflw(xmm1, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.temp.lod.f], xmm1); - } - - // shift u/v/minmax by (int)lod - - if(m_cpu.has(util::Cpu::tAVX2)) - { - vpsravd(xmm2, xmm2, xmm0); - vpsravd(xmm3, xmm3, xmm0); - - vmovdqa(ptr[&m_local.temp.uv[0]], xmm2); - vmovdqa(ptr[&m_local.temp.uv[1]], xmm3); - - // m_local.gd->t.minmax => m_local.temp.uv_minmax[0/1] - - vpxor(xmm1, xmm1); - - vmovdqa(xmm4, ptr[&m_local.gd->t.min]); - vpunpcklwd(xmm5, xmm4, xmm1); // minu - vpunpckhwd(xmm6, xmm4, xmm1); // minv - vpsrlvd(xmm5, xmm5, xmm0); - vpsrlvd(xmm6, xmm6, xmm0); - vpackusdw(xmm5, xmm6); - - vmovdqa(xmm4, ptr[&m_local.gd->t.max]); - vpunpcklwd(xmm6, xmm4, xmm1); // maxu - vpunpckhwd(xmm4, xmm4, xmm1); // maxv - vpsrlvd(xmm6, xmm6, xmm0); - vpsrlvd(xmm4, xmm4, xmm0); - vpackusdw(xmm6, xmm4); - - vmovdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5); - vmovdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6); - } - else - { - vmovq(xmm4, ptr[&m_local.gd->t.minmax]); - - vpunpckldq(xmm5, xmm2, xmm3); - vpunpckhdq(xmm6, xmm2, xmm3); - vmovdqa(xmm2, xmm5); - vmovdqa(xmm3, xmm6); - - vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]); - vpsrad(xmm2, xmm0); - vpsrlw(xmm1, xmm4, xmm0); - vmovq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1); - - vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]); - vpsrad(xmm5, xmm0); - vpsrlw(xmm1, xmm4, xmm0); - vmovq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1); - - vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]); - vpsrad(xmm3, xmm0); - vpsrlw(xmm1, xmm4, xmm0); - vmovq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1); - - vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]); - vpsrad(xmm6, xmm0); - vpsrlw(xmm1, xmm4, xmm0); - vmovq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1); - - vpunpckldq(xmm2, xmm3); - vpunpckhdq(xmm5, xmm6); - vpunpckhdq(xmm3, xmm2, xmm5); - vpunpckldq(xmm2, xmm5); - - vmovdqa(ptr[&m_local.temp.uv[0]], xmm2); - vmovdqa(ptr[&m_local.temp.uv[1]], xmm3); - - vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]); - vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]); - - vpunpcklwd(xmm0, xmm5, xmm6); - vpunpckhwd(xmm1, xmm5, xmm6); - vpunpckldq(xmm5, xmm0, xmm1); - vpunpckhdq(xmm6, xmm0, xmm1); - - vmovdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5); - vmovdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6); - } - } - else - { - // lod = K - - vmovd(xmm0, ptr[&m_local.gd->lod.i.u32[0]]); - - vpsrad(xmm2, xmm0); - vpsrad(xmm3, xmm0); - - vmovdqa(ptr[&m_local.temp.uv[0]], xmm2); - vmovdqa(ptr[&m_local.temp.uv[1]], xmm3); - - vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]); - vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]); - } - - // xmm2 = m_local.temp.uv[0] = u (level m) - // xmm3 = m_local.temp.uv[1] = v (level m) - // xmm5 = minuv - // xmm6 = maxuv - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - vmovd(xmm4, eax); - vpshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - - vpsubd(xmm2, xmm4); - vpsubd(xmm3, xmm4); - - // GSVector4i uf = u.xxzzlh().srl16(1); - - vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 12); - vmovdqa(ptr[&m_local.temp.uf], xmm0); - - // GSVector4i vf = v.xxzzlh().srl16(1); - - vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 12); - vmovdqa(ptr[&m_local.temp.vf], xmm0); - } - - // GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - - vpsrad(xmm2, 16); - vpsrad(xmm3, 16); - vpackssdw(xmm2, xmm3); - - if(m_sel.ltf) - { - // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); - - vpcmpeqd(xmm1, xmm1); - vpsrlw(xmm1, 15); - vpaddw(xmm3, xmm2, xmm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - WrapLOD(xmm2, xmm3); - } - else - { - // uv0 = Wrap(uv0); - - WrapLOD(xmm2); - } - - // xmm2 = uv0 - // xmm3 = uv1 (ltf) - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i x0 = uv0.upl16(); - // GSVector4i y0 = uv0.uph16() << tw; - - vpxor(xmm0, xmm0); - - vpunpcklwd(xmm4, xmm2, xmm0); - vpunpckhwd(xmm2, xmm2, xmm0); - vpslld(xmm2, m_sel.tw + 3); - - // xmm0 = 0 - // xmm2 = y0 - // xmm3 = uv1 (ltf) - // xmm4 = x0 - // xmm1, xmm5, xmm6 = free - // xmm7 = used - - if(m_sel.ltf) - { - // GSVector4i x1 = uv1.upl16(); - // GSVector4i y1 = uv1.uph16() << tw; - - vpunpcklwd(xmm6, xmm3, xmm0); - vpunpckhwd(xmm3, xmm3, xmm0); - vpslld(xmm3, m_sel.tw + 3); - - // xmm2 = y0 - // xmm3 = y1 - // xmm4 = x0 - // xmm6 = x1 - // xmm0, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i addr00 = y0 + x0; - // GSVector4i addr01 = y0 + x1; - // GSVector4i addr10 = y1 + x0; - // GSVector4i addr11 = y1 + x1; - - vpaddd(xmm5, xmm2, xmm4); - vpaddd(xmm2, xmm2, xmm6); - vpaddd(xmm0, xmm3, xmm4); - vpaddd(xmm3, xmm3, xmm6); - - // xmm5 = addr00 - // xmm2 = addr01 - // xmm0 = addr10 - // xmm3 = addr11 - // xmm1, xmm4, xmm6 = free - // xmm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 0); - - // xmm6 = c00 - // xmm4 = c01 - // xmm1 = c10 - // xmm5 = c11 - // xmm0, xmm2, xmm3 = free - // xmm7 = used - - vmovdqa(xmm0, ptr[&m_local.temp.uf]); - - // GSVector4i rb00 = c00 & mask; - // GSVector4i ga00 = (c00 >> 8) & mask; - - vpsllw(xmm2, xmm6, 8); - vpsrlw(xmm2, 8); - vpsrlw(xmm6, 8); - - // GSVector4i rb01 = c01 & mask; - // GSVector4i ga01 = (c01 >> 8) & mask; - - vpsllw(xmm3, xmm4, 8); - vpsrlw(xmm3, 8); - vpsrlw(xmm4, 8); - - // xmm0 = uf - // xmm2 = rb00 - // xmm3 = rb01 - // xmm6 = ga00 - // xmm4 = ga01 - // xmm1 = c10 - // xmm5 = c11 - // xmm7 = used - - // rb00 = rb00.lerp16_4(rb01, uf); - // ga00 = ga00.lerp16_4(ga01, uf); - - lerp16_4(xmm3, xmm2, xmm0); - lerp16_4(xmm4, xmm6, xmm0); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = c10 - // xmm5 = c11 - // xmm2, xmm6 = free - // xmm7 = used - - // GSVector4i rb10 = c10 & mask; - // GSVector4i ga10 = (c10 >> 8) & mask; - - vpsrlw(xmm2, xmm1, 8); - vpsllw(xmm1, 8); - vpsrlw(xmm1, 8); - - // GSVector4i rb11 = c11 & mask; - // GSVector4i ga11 = (c11 >> 8) & mask; - - vpsrlw(xmm6, xmm5, 8); - vpsllw(xmm5, 8); - vpsrlw(xmm5, 8); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = rb10 - // xmm5 = rb11 - // xmm2 = ga10 - // xmm6 = ga11 - // xmm7 = used - - // rb10 = rb10.lerp16_4(rb11, uf); - // ga10 = ga10.lerp16_4(ga11, uf); - - lerp16_4(xmm5, xmm1, xmm0); - lerp16_4(xmm6, xmm2, xmm0); - - // xmm3 = rb00 - // xmm4 = ga00 - // xmm5 = rb10 - // xmm6 = ga10 - // xmm0, xmm1, xmm2 = free - // xmm7 = used - - // rb00 = rb00.lerp16_4(rb10, vf); - // ga00 = ga00.lerp16_4(ga10, vf); - - vmovdqa(xmm0, ptr[&m_local.temp.vf]); - - lerp16_4(xmm5, xmm3, xmm0); - lerp16_4(xmm6, xmm4, xmm0); - } - else - { - // GSVector4i addr00 = y0 + x0; - - vpaddd(xmm5, xmm2, xmm4); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 0); - - // GSVector4i mask = GSVector4i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - vpsllw(xmm5, xmm6, 8); - vpsrlw(xmm5, 8); - vpsrlw(xmm6, 8); - } - - if(m_sel.mmin != 1) // !round-off mode - { - vmovdqa(ptr[&m_local.temp.trb], xmm5); - vmovdqa(ptr[&m_local.temp.tga], xmm6); - - vmovdqa(xmm2, ptr[&m_local.temp.uv[0]]); - vmovdqa(xmm3, ptr[&m_local.temp.uv[1]]); - - vpsrad(xmm2, 1); - vpsrad(xmm3, 1); - - vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]); - vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]); - - vpsrlw(xmm5, 1); - vpsrlw(xmm6, 1); - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - vmovd(xmm4, eax); - vpshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - - vpsubd(xmm2, xmm4); - vpsubd(xmm3, xmm4); - - // GSVector4i uf = u.xxzzlh().srl16(1); - - vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 12); - vmovdqa(ptr[&m_local.temp.uf], xmm0); - - // GSVector4i vf = v.xxzzlh().srl16(1); - - vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 12); - vmovdqa(ptr[&m_local.temp.vf], xmm0); - } - - // GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - - vpsrad(xmm2, 16); - vpsrad(xmm3, 16); - vpackssdw(xmm2, xmm3); - - if(m_sel.ltf) - { - // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); - - vpcmpeqd(xmm1, xmm1); - vpsrlw(xmm1, 15); - vpaddw(xmm3, xmm2, xmm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - WrapLOD(xmm2, xmm3); - } - else - { - // uv0 = Wrap(uv0); - - WrapLOD(xmm2); - } - - // xmm2 = uv0 - // xmm3 = uv1 (ltf) - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i x0 = uv0.upl16(); - // GSVector4i y0 = uv0.uph16() << tw; - - vpxor(xmm0, xmm0); - - vpunpcklwd(xmm4, xmm2, xmm0); - vpunpckhwd(xmm2, xmm2, xmm0); - vpslld(xmm2, m_sel.tw + 3); - - // xmm0 = 0 - // xmm2 = y0 - // xmm3 = uv1 (ltf) - // xmm4 = x0 - // xmm1, xmm5, xmm6 = free - // xmm7 = used - - if(m_sel.ltf) - { - // GSVector4i x1 = uv1.upl16(); - // GSVector4i y1 = uv1.uph16() << tw; - - vpunpcklwd(xmm6, xmm3, xmm0); - vpunpckhwd(xmm3, xmm3, xmm0); - vpslld(xmm3, m_sel.tw + 3); - - // xmm2 = y0 - // xmm3 = y1 - // xmm4 = x0 - // xmm6 = x1 - // xmm0, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i addr00 = y0 + x0; - // GSVector4i addr01 = y0 + x1; - // GSVector4i addr10 = y1 + x0; - // GSVector4i addr11 = y1 + x1; - - vpaddd(xmm5, xmm2, xmm4); - vpaddd(xmm2, xmm2, xmm6); - vpaddd(xmm0, xmm3, xmm4); - vpaddd(xmm3, xmm3, xmm6); - - // xmm5 = addr00 - // xmm2 = addr01 - // xmm0 = addr10 - // xmm3 = addr11 - // xmm1, xmm4, xmm6 = free - // xmm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 1); - - // xmm6 = c00 - // xmm4 = c01 - // xmm1 = c10 - // xmm5 = c11 - // xmm0, xmm2, xmm3 = free - // xmm7 = used - - vmovdqa(xmm0, ptr[&m_local.temp.uf]); - - // GSVector4i rb00 = c00 & mask; - // GSVector4i ga00 = (c00 >> 8) & mask; - - vpsllw(xmm2, xmm6, 8); - vpsrlw(xmm2, 8); - vpsrlw(xmm6, 8); - - // GSVector4i rb01 = c01 & mask; - // GSVector4i ga01 = (c01 >> 8) & mask; - - vpsllw(xmm3, xmm4, 8); - vpsrlw(xmm3, 8); - vpsrlw(xmm4, 8); - - // xmm0 = uf - // xmm2 = rb00 - // xmm3 = rb01 - // xmm6 = ga00 - // xmm4 = ga01 - // xmm1 = c10 - // xmm5 = c11 - // xmm7 = used - - // rb00 = rb00.lerp16_4(rb01, uf); - // ga00 = ga00.lerp16_4(ga01, uf); - - lerp16_4(xmm3, xmm2, xmm0); - lerp16_4(xmm4, xmm6, xmm0); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = c10 - // xmm5 = c11 - // xmm2, xmm6 = free - // xmm7 = used - - // GSVector4i rb10 = c10 & mask; - // GSVector4i ga10 = (c10 >> 8) & mask; - - vpsrlw(xmm2, xmm1, 8); - vpsllw(xmm1, 8); - vpsrlw(xmm1, 8); - - // GSVector4i rb11 = c11 & mask; - // GSVector4i ga11 = (c11 >> 8) & mask; - - vpsrlw(xmm6, xmm5, 8); - vpsllw(xmm5, 8); - vpsrlw(xmm5, 8); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = rb10 - // xmm5 = rb11 - // xmm2 = ga10 - // xmm6 = ga11 - // xmm7 = used - - // rb10 = rb10.lerp16_4(rb11, uf); - // ga10 = ga10.lerp16_4(ga11, uf); - - lerp16_4(xmm5, xmm1, xmm0); - lerp16_4(xmm6, xmm2, xmm0); - - // xmm3 = rb00 - // xmm4 = ga00 - // xmm5 = rb10 - // xmm6 = ga10 - // xmm0, xmm1, xmm2 = free - // xmm7 = used - - // rb00 = rb00.lerp16_4(rb10, vf); - // ga00 = ga00.lerp16_4(ga10, vf); - - vmovdqa(xmm0, ptr[&m_local.temp.vf]); - - lerp16_4(xmm5, xmm3, xmm0); - lerp16_4(xmm6, xmm4, xmm0); - } - else - { - // GSVector4i addr00 = y0 + x0; - - vpaddd(xmm5, xmm2, xmm4); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 1); - - // GSVector4i mask = GSVector4i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - vpsllw(xmm5, xmm6, 8); - vpsrlw(xmm5, 8); - vpsrlw(xmm6, 8); - } - - vmovdqa(xmm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]); - vpsrlw(xmm0, xmm0, 1); - - vmovdqa(xmm2, ptr[&m_local.temp.trb]); - vmovdqa(xmm3, ptr[&m_local.temp.tga]); - - lerp16(xmm5, xmm2, xmm0, 0); - lerp16(xmm6, xmm3, xmm0, 0); - } - - pop(ebp); -} - -void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv) -{ - // xmm5 = minuv - // xmm6 = maxuv - // xmm0, xmm1, xmm4 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vpmaxsw(uv, xmm5); - } - else - { - vpxor(xmm0, xmm0); - vpmaxsw(uv, xmm0); - } - - vpminsw(uv, xmm6); - } - else - { - vpand(uv, xmm5); - - if(region) - { - vpor(uv, xmm6); - } - } - } - else - { - vmovdqa(xmm0, ptr[&m_local.gd->t.mask]); - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv, xmm5); - - if(region) - { - vpor(xmm1, xmm6); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv, xmm5); - vpminsw(uv, xmm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv, xmm1, xmm0); - } -} - -void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1) -{ - // xmm5 = minuv - // xmm6 = maxuv - // xmm0, xmm1, xmm4 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vpmaxsw(uv0, xmm5); - vpmaxsw(uv1, xmm5); - } - else - { - vpxor(xmm0, xmm0); - vpmaxsw(uv0, xmm0); - vpmaxsw(uv1, xmm0); - } - - vpminsw(uv0, xmm6); - vpminsw(uv1, xmm6); - } - else - { - vpand(uv0, xmm5); - vpand(uv1, xmm5); - - if(region) - { - vpor(uv0, xmm6); - vpor(uv1, xmm6); - } - } - } - else - { - vmovdqa(xmm0, ptr[&m_local.gd->t.mask]); - - // uv0 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv0, xmm5); - - if(region) - { - vpor(xmm1, xmm6); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv0, xmm5); - vpminsw(uv0, xmm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv0, xmm1, xmm0); - - // uv1 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(xmm1, uv1, xmm5); - - if(region) - { - vpor(xmm1, xmm6); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv1, xmm5); - vpminsw(uv1, xmm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv1, xmm1, xmm0); - } -} - -void GSDrawScanlineCodeGenerator::AlphaTFX() -{ - if(!m_sel.fb) - { - return; - } - - switch(m_sel.tfx) - { - case TFX_MODULATE: - - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - // gat = gat.modulate16<1>(ga).clamp8(); - - modulate16(xmm6, xmm4, 1); - - clamp16(xmm6, xmm3); - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(xmm4, 7); - - mix16(xmm6, xmm4, xmm3); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - vpsrlw(xmm4, 7); - - mix16(xmm6, xmm4, xmm3); - } - - break; - - case TFX_HIGHLIGHT: - - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vmovdqa(xmm2, xmm4); - - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); - - vpsrlw(xmm4, 7); - - if(m_sel.tcc) - { - vpaddusb(xmm4, xmm6); - } - - mix16(xmm6, xmm4, xmm3); - - break; - - case TFX_HIGHLIGHT2: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vmovdqa(xmm2, xmm4); - - vpsrlw(xmm4, 7); - - mix16(xmm6, xmm4, xmm3); - } - - break; - - case TFX_NONE: - - // gat = iip ? ga.srl16(7) : ga; - - if(m_sel.iip) - { - vpsrlw(xmm6, 7); - } - - break; - } - - if(m_sel.aa1) - { - // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha - - // FIXME: bios config screen cubes - - if(!m_sel.abe) - { - // a = cov - - if(m_sel.edge) - { - vmovdqa(xmm0, ptr[&m_local.temp.cov]); - } - else - { - vpcmpeqd(xmm0, xmm0); - vpsllw(xmm0, 15); - vpsrlw(xmm0, 8); - } - - mix16(xmm6, xmm0, xmm1); - } - else - { - // a = a == 0x80 ? cov : a - - vpcmpeqd(xmm0, xmm0); - vpsllw(xmm0, 15); - vpsrlw(xmm0, 8); - - if(m_sel.edge) - { - vmovdqa(xmm1, ptr[&m_local.temp.cov]); - } - else - { - vmovdqa(xmm1, xmm0); - } - - vpcmpeqw(xmm0, xmm6); - vpsrld(xmm0, 16); - vpslld(xmm0, 16); - - vpblendvb(xmm6, xmm1, xmm0); - } - } -} - -void GSDrawScanlineCodeGenerator::ReadMask() -{ - if(m_sel.fwrite) - { - vmovdqa(xmm3, ptr[&m_local.gd->fm]); - } - - if(m_sel.zwrite) - { - vmovdqa(xmm4, ptr[&m_local.gd->zm]); - } -} - -void GSDrawScanlineCodeGenerator::TestAlpha() -{ - switch(m_sel.afail) - { - case AFAIL_FB_ONLY: - if(!m_sel.zwrite) return; - break; - - case AFAIL_ZB_ONLY: - if(!m_sel.fwrite) return; - break; - - case AFAIL_RGB_ONLY: - if(!m_sel.zwrite && m_sel.fpsm == 1) return; - break; - } - - switch(m_sel.atst) - { - case ATST_NEVER: - // t = GSVector4i::xffffffff(); - vpcmpeqd(xmm1, xmm1); - break; - - case ATST_ALWAYS: - return; - - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - vpsrld(xmm1, xmm6, 16); - vpcmpgtd(xmm1, ptr[&m_local.gd->aref]); - break; - - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - vpsrld(xmm1, xmm6, 16); - vpcmpeqd(xmm1, ptr[&m_local.gd->aref]); - vpcmpeqd(xmm0, xmm0); - vpxor(xmm1, xmm0); - break; - - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - vpsrld(xmm0, xmm6, 16); - vmovdqa(xmm1, ptr[&m_local.gd->aref]); - vpcmpgtd(xmm1, xmm0); - break; - - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - vpsrld(xmm1, xmm6, 16); - vpcmpeqd(xmm1, ptr[&m_local.gd->aref]); - break; - } - - switch(m_sel.afail) - { - case AFAIL_KEEP: - // test |= t; - vpor(xmm7, xmm1); - alltrue(); - break; - - case AFAIL_FB_ONLY: - // zm |= t; - vpor(xmm4, xmm1); - break; - - case AFAIL_ZB_ONLY: - // fm |= t; - vpor(xmm3, xmm1); - break; - - case AFAIL_RGB_ONLY: - // zm |= t; - vpor(xmm4, xmm1); - // fm |= t & GSVector4i::xff000000(); - vpsrld(xmm1, 24); - vpslld(xmm1, 24); - vpor(xmm3, xmm1); - break; - } -} - -void GSDrawScanlineCodeGenerator::ColorTFX() -{ - if(!m_sel.fwrite) - { - return; - } - - switch(m_sel.tfx) - { - case TFX_MODULATE: - - // GSVector4i rb = iip ? rbf : m_local.c.rb; - - // rbt = rbt.modulate16<1>(rb).clamp8(); - - modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - - clamp16(xmm5, xmm1); - - break; - - case TFX_DECAL: - - break; - - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - - if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(xmm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - } - - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - - vmovdqa(xmm1, xmm6); - - modulate16(xmm6, xmm2, 1); - - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpsrlw(xmm2, 7); - - vpaddw(xmm6, xmm2); - - clamp16(xmm6, xmm0); - - mix16(xmm6, xmm1, xmm0); - - // GSVector4i rb = iip ? rbf : m_local.c.rb; - - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - - modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - - vpaddw(xmm5, xmm2); - - clamp16(xmm5, xmm0); - - break; - - case TFX_NONE: - - // rbt = iip ? rb.srl16(7) : rb; - - if(m_sel.iip) - { - vpsrlw(xmm5, 7); - } - - break; - } -} - -void GSDrawScanlineCodeGenerator::Fog() -{ - if(!m_sel.fwrite || !m_sel.fge) - { - return; - } - - // rb = m_local.gd->frb.lerp16<0>(rb, f); - // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); - - vmovdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]); - vmovdqa(xmm1, xmm6); - - vmovdqa(xmm2, ptr[&m_local.gd->frb]); - lerp16(xmm5, xmm2, xmm0, 0); - - vmovdqa(xmm2, ptr[&m_local.gd->fga]); - lerp16(xmm6, xmm2, xmm0, 0); - mix16(xmm6, xmm1, xmm0); -} - -void GSDrawScanlineCodeGenerator::ReadFrame() -{ - if(!m_sel.fb) - { - return; - } - - // int fa = fza_base.x + fza_offset->x; - - mov(ebx, ptr[esi]); - add(ebx, ptr[edi]); - - if(!m_sel.rfb) - { - return; - } - - ReadPixel(xmm2, ebx); -} - -void GSDrawScanlineCodeGenerator::TestDestAlpha() -{ - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) - { - return; - } - - // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); - - if(m_sel.datm) - { - if(m_sel.fpsm == 2) - { - vpxor(xmm0, xmm0); - //vpsrld(xmm1, xmm2, 15); - vpslld(xmm1, xmm2, 16); - vpsrad(xmm1, 31); - vpcmpeqd(xmm1, xmm0); - } - else - { - vpcmpeqd(xmm0, xmm0); - vpxor(xmm1, xmm2, xmm0); - vpsrad(xmm1, 31); - } - } - else - { - if(m_sel.fpsm == 2) - { - vpslld(xmm1, xmm2, 16); - vpsrad(xmm1, 31); - } - else - { - vpsrad(xmm1, xmm2, 31); - } - } - - vpor(xmm7, xmm1); - - alltrue(); -} - -void GSDrawScanlineCodeGenerator::WriteMask() -{ - if(m_sel.notest) - { - return; - } - - // fm |= test; - // zm |= test; - - if(m_sel.fwrite) - { - vpor(xmm3, xmm7); - } - - if(m_sel.zwrite) - { - vpor(xmm4, xmm7); - } - - // int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask(); - - vpcmpeqd(xmm1, xmm1); - - if(m_sel.fwrite && m_sel.zwrite) - { - vpcmpeqd(xmm0, xmm1, xmm4); - vpcmpeqd(xmm1, xmm3); - vpackssdw(xmm1, xmm0); - } - else if(m_sel.fwrite) - { - vpcmpeqd(xmm1, xmm3); - vpackssdw(xmm1, xmm1); - } - else if(m_sel.zwrite) - { - vpcmpeqd(xmm1, xmm4); - vpackssdw(xmm1, xmm1); - } - - vpmovmskb(edx, xmm1); - - not(edx); -} - -void GSDrawScanlineCodeGenerator::WriteZBuf() -{ - if(!m_sel.zwrite) - { - return; - } - - vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]); - - if(m_sel.ztest && m_sel.zpsm < 2) - { - // zs = zs.blend8(zd, zm); - - vpblendvb(xmm1, ptr[&m_local.temp.zd], xmm4); - } - - bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest; - - WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1); -} - -void GSDrawScanlineCodeGenerator::AlphaBlend() -{ - if(!m_sel.fwrite) - { - return; - } - - if(m_sel.abe == 0 && m_sel.aa1 == 0) - { - return; - } - - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) - { - switch(m_sel.fpsm) - { - case 0: - case 1: - - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; - - vpsllw(xmm0, xmm2, 8); - vpsrlw(xmm0, 8); - vpsrlw(xmm1, xmm2, 8); - - break; - - case 2: - - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - - vpcmpeqd(xmm7, xmm7); - - vpsrld(xmm7, 27); // 0x0000001f - vpand(xmm0, xmm2, xmm7); - vpslld(xmm0, 3); - - vpslld(xmm7, 10); // 0x00007c00 - vpand(xmm4, xmm2, xmm7); - vpslld(xmm4, 9); - - vpor(xmm0, xmm4); - - vpsrld(xmm7, 5); // 0x000003e0 - vpand(xmm1, xmm2, xmm7); - vpsrld(xmm1, 2); - - vpsllw(xmm7, 10); // 0x00008000 - vpand(xmm4, xmm2, xmm7); - vpslld(xmm4, 8); - - vpor(xmm1, xmm4); - - break; - } - } - - // xmm5, xmm6 = src rb, ga - // xmm0, xmm1 = dst rb, ga - // xmm2, xmm3 = used - // xmm4, xmm7 = free - - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) - { - vmovdqa(xmm4, xmm5); - } - - if(m_sel.aba != m_sel.abb) - { - // rb = c[aba * 2 + 0]; - - switch(m_sel.aba) - { - case 0: break; - case 1: vmovdqa(xmm5, xmm0); break; - case 2: vpxor(xmm5, xmm5); break; - } - - // rb = rb.sub16(c[abb * 2 + 0]); - - switch(m_sel.abb) - { - case 0: vpsubw(xmm5, xmm4); break; - case 1: vpsubw(xmm5, xmm0); break; - case 2: break; - } - - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) - { - // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - - switch(m_sel.abc) - { - case 0: - case 1: - vpshuflw(xmm7, m_sel.abc ? xmm1 : xmm6, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); - vpsllw(xmm7, 7); - break; - case 2: - vmovdqa(xmm7, ptr[&m_local.gd->afix]); - break; - } - - // rb = rb.modulate16<1>(a); - - modulate16(xmm5, xmm7, 1); - } - - // rb = rb.add16(c[abd * 2 + 0]); - - switch(m_sel.abd) - { - case 0: vpaddw(xmm5, xmm4); break; - case 1: vpaddw(xmm5, xmm0); break; - case 2: break; - } - } - else - { - // rb = c[abd * 2 + 0]; - - switch(m_sel.abd) - { - case 0: break; - case 1: vmovdqa(xmm5, xmm0); break; - case 2: vpxor(xmm5, xmm5); break; - } - } - - if(m_sel.pabe) - { - // mask = (c[1] << 8).sra32(31); - - vpslld(xmm0, xmm6, 8); - vpsrad(xmm0, 31); - - // rb = c[0].blend8(rb, mask); - - vpblendvb(xmm5, xmm4, xmm5, xmm0); - } - - // xmm6 = src ga - // xmm1 = dst ga - // xmm5 = rb - // xmm7 = a - // xmm2, xmm3 = used - // xmm0, xmm4 = free - - vmovdqa(xmm4, xmm6); - - if(m_sel.aba != m_sel.abb) - { - // ga = c[aba * 2 + 1]; - - switch(m_sel.aba) - { - case 0: break; - case 1: vmovdqa(xmm6, xmm1); break; - case 2: vpxor(xmm6, xmm6); break; - } - - // ga = ga.sub16(c[abeb * 2 + 1]); - - switch(m_sel.abb) - { - case 0: vpsubw(xmm6, xmm4); break; - case 1: vpsubw(xmm6, xmm1); break; - case 2: break; - } - - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) - { - // ga = ga.modulate16<1>(a); - - modulate16(xmm6, xmm7, 1); - } - - // ga = ga.add16(c[abd * 2 + 1]); - - switch(m_sel.abd) - { - case 0: vpaddw(xmm6, xmm4); break; - case 1: vpaddw(xmm6, xmm1); break; - case 2: break; - } - } - else - { - // ga = c[abd * 2 + 1]; - - switch(m_sel.abd) - { - case 0: break; - case 1: vmovdqa(xmm6, xmm1); break; - case 2: vpxor(xmm6, xmm6); break; - } - } - - // xmm4 = src ga - // xmm5 = rb - // xmm6 = ga - // xmm2, xmm3 = used - // xmm0, xmm1, xmm7 = free - - if(m_sel.pabe) - { - vpsrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) - - // ga = c[1].blend8(ga, mask).mix16(c[1]); - - vpblendvb(xmm6, xmm4, xmm6, xmm0); - } - else - { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx - { - mix16(xmm6, xmm4, xmm7); - } - } -} - -void GSDrawScanlineCodeGenerator::WriteFrame() -{ - if(!m_sel.fwrite) - { - return; - } - - if(m_sel.fpsm == 2 && m_sel.dthe) - { - mov(eax, ptr[esp + _top]); - and(eax, 3); - shl(eax, 5); - mov(ebp, ptr[&m_local.gd->dimx]); - vpaddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]); - vpaddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]); - } - - if(m_sel.colclamp == 0) - { - // c[0] &= 0x00ff00ff; - // c[1] &= 0x00ff00ff; - - vpcmpeqd(xmm7, xmm7); - vpsrlw(xmm7, 8); - vpand(xmm5, xmm7); - vpand(xmm6, xmm7); - } - - // GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1])); - - vpunpckhwd(xmm7, xmm5, xmm6); - vpunpcklwd(xmm5, xmm6); - vpackuswb(xmm5, xmm7); - - if(m_sel.fba && m_sel.fpsm != 1) - { - // fs |= 0x80000000; - - vpcmpeqd(xmm7, xmm7); - vpslld(xmm7, 31); - vpor(xmm5, xmm7); - } - - if(m_sel.fpsm == 2) - { - // GSVector4i rb = fs & 0x00f800f8; - // GSVector4i ga = fs & 0x8000f800; - - mov(eax, 0x00f800f8); - vmovd(xmm6, eax); - vpshufd(xmm6, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); - - mov(eax, 0x8000f800); - vmovd(xmm7, eax); - vpshufd(xmm7, xmm7, _MM_SHUFFLE(0, 0, 0, 0)); - - vpand(xmm4, xmm5, xmm6); - vpand(xmm5, xmm7); - - // fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); - - vpsrld(xmm6, xmm4, 9); - vpsrld(xmm4, 3); - vpsrld(xmm7, xmm5, 16); - vpsrld(xmm5, 6); - - vpor(xmm5, xmm4); - vpor(xmm7, xmm6); - vpor(xmm5, xmm7); - } - - if(m_sel.rfb) - { - // fs = fs.blend(fd, fm); - - blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm - } - - bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest; - - WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0); -} - -void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr) -{ - vmovq(dst, qword[addr * 2 + (size_t)m_local.gd->vm]); - vmovhps(dst, qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2]); -} - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz) -{ - if(m_sel.notest) - { - if(fast) - { - vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src); - vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src); - } - else - { - WritePixel(src, addr, 0, psm); - WritePixel(src, addr, 1, psm); - WritePixel(src, addr, 2, psm); - WritePixel(src, addr, 3, psm); - } - } - else - { - if(fast) - { - // if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs); - // if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs); - - test(mask, 0x0f); - je("@f"); - vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src); - L("@@"); - - test(mask, 0xf0); - je("@f"); - vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src); - L("@@"); - - // vmaskmovps? - } - else - { - // if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>()); - // if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>()); - // if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>()); - // if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>()); - - test(mask, 0x03); - je("@f"); - WritePixel(src, addr, 0, psm); - L("@@"); - - test(mask, 0x0c); - je("@f"); - WritePixel(src, addr, 1, psm); - L("@@"); - - test(mask, 0x30); - je("@f"); - WritePixel(src, addr, 2, psm); - L("@@"); - - test(mask, 0xc0); - je("@f"); - WritePixel(src, addr, 3, psm); - L("@@"); - } - } -} - -static const int s_offsets[] = {0, 2, 8, 10}; - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm) -{ - Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2]; - - switch(psm) - { - case 0: - if(i == 0) vmovd(dst, src); - else vpextrd(dst, src, i); - break; - case 1: - if(i == 0) vmovd(eax, src); - else vpextrd(eax, src, i); - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - if(i == 0) vmovd(eax, src); - else vpextrw(eax, src, i * 2); - mov(dst, ax); - break; - } -} - -void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) -{ - // in - // xmm5 = addr00 - // xmm2 = addr01 - // xmm0 = addr10 - // xmm3 = addr11 - // ebx = m_local.tex[0] (!m_sel.mmin) - // ebp = m_local.tex (m_sel.mmin) - // edx = m_local.clut (m_sel.tlu) - - // out - // xmm6 = c00 - // xmm4 = c01 - // xmm1 = c10 - // xmm5 = c11 - - ASSERT(pixels == 1 || pixels == 4); - - mip_offset *= sizeof(void*); - - const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - - if(m_sel.mmin && !m_sel.lcm) - { - const int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; - - if(pixels == 4) - { - vmovdqa(ptr[&m_local.temp.test], xmm7); - } - - for(uint8 j = 0; j < 4; j++) - { - mov(ebx, ptr[&lod_i->u32[j]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - for(int i = 0; i < pixels; i++) - { - ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); - } - } - - if(pixels == 4) - { - vmovdqa(xmm5, xmm7); - vmovdqa(xmm7, ptr[&m_local.temp.test]); - } - } - else - { - if(m_sel.mmin && m_sel.lcm) - { - mov(ebx, ptr[&lod_i->u32[0]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - } - - const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; - const int t[] = {4, 1, 5, 2}; - - for(int i = 0; i < pixels; i++) - { - for(uint8 j = 0; j < 4; j++) - { - ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); - } - } - } -} - -void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i) -{ - ASSERT(i < 4); - - const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; - - if(i == 0) vmovd(eax, addr); - else vpextrd(eax, addr, i); - - if(m_sel.tlu) movzx(eax, byte[ebx + eax]); - - if(i == 0) vmovd(dst, src); - else vpinsrd(dst, src, i); -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.avx2.cpp b/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.avx2.cpp deleted file mode 100644 index 7ee865c0ec..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.avx2.cpp +++ /dev/null @@ -1,2970 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDrawScanlineCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE >= 0x501 && !(defined(_M_AMD64) || defined(_WIN64)) - -static const int _args = 16; -static const int _top = _args + 4; -static const int _v = _args + 8; - -void GSDrawScanlineCodeGenerator::Generate() -{ -//ret(8); - - push(ebx); - push(esi); - push(edi); - push(ebp); - - //db(0xcc); - - Init(); - - if(!m_sel.edge) - { - align(16); - } - -L("loop"); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ymm0 = z/zi - // ymm2 = s/u (tme) - // ymm3 = t/v (tme) - // ymm4 = q (tme) - // ymm5 = rb (!tme) - // ymm6 = ga (!tme) - // ymm7 = test - - bool tme = m_sel.tfx != TFX_NONE; - - TestZ(tme ? ymm5 : ymm2, tme ? ymm6 : ymm3); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // - ymm0 - // ymm2 = s/u (tme) - // ymm3 = t/v (tme) - // ymm4 = q (tme) - // ymm5 = rb (!tme) - // ymm6 = ga (!tme) - // ymm7 = test - - if(m_sel.mmin) - { - SampleTextureLOD(); - } - else - { - SampleTexture(); - } - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // - ymm2 - // - ymm3 - // - ymm4 - // ymm5 = rb - // ymm6 = ga - // ymm7 = test - - AlphaTFX(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // ymm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // ymm5 = rb - // ymm6 = ga - // ymm7 = test - - ReadMask(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // ymm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // ymm3 = fm - // ymm4 = zm - // ymm5 = rb - // ymm6 = ga - // ymm7 = test - - TestAlpha(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // ymm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // ymm3 = fm - // ymm4 = zm - // ymm5 = rb - // ymm6 = ga - // ymm7 = test - - ColorTFX(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // ymm3 = fm - // ymm4 = zm - // ymm5 = rb - // ymm6 = ga - // ymm7 = test - - Fog(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // ymm3 = fm - // ymm4 = zm - // ymm5 = rb - // ymm6 = ga - // ymm7 = test - - ReadFrame(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // ymm2 = fd - // ymm3 = fm - // ymm4 = zm - // ymm5 = rb - // ymm6 = ga - // ymm7 = test - - TestDestAlpha(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // ymm2 = fd - // ymm3 = fm - // ymm4 = zm - // ymm5 = rb - // ymm6 = ga - // ymm7 = test - - WriteMask(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // ebp = za - // ymm2 = fd - // ymm3 = fm - // ymm4 = zm - // ymm5 = rb - // ymm6 = ga - - WriteZBuf(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // - ebp - // ymm2 = fd - // ymm3 = fm - // - ymm4 - // ymm5 = rb - // ymm6 = ga - - AlphaBlend(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // ymm2 = fd - // ymm3 = fm - // ymm5 = rb - // ymm6 = ga - - WriteFrame(); - -L("step"); - - // if(steps <= 0) break; - - if(!m_sel.edge) - { - test(ecx, ecx); - - jle("exit", T_NEAR); - - Step(); - - jmp("loop", T_NEAR); - } - -L("exit"); - - pop(ebp); - pop(edi); - pop(esi); - pop(ebx); - - ret(8); -} - -void GSDrawScanlineCodeGenerator::Init() -{ - if(!m_sel.notest) - { - // int skip = left & 7; - - mov(ebx, edx); - and(edx, 7); - - // int steps = pixels + skip - 8; - - lea(ecx, ptr[ecx + edx - 8]); - - // left -= skip; - - sub(ebx, edx); - - // GSVector4i test = m_test[skip] | m_test[15 + (steps & (steps >> 31))]; - - mov(eax, ecx); - sar(eax, 31); - and(eax, ecx); - - vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[0]]); - vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)&m_test[15]]); - vpor(ymm7, ymm0); - - shl(edx, 5); - } - else - { - mov(ebx, edx); // left - xor(edx, edx); // skip - lea(ecx, ptr[ecx - 8]); // steps - } - - // GSVector2i* fza_base = &m_local.gd->fzbr[top]; - - mov(esi, ptr[esp + _top]); - lea(esi, ptr[esi * 8]); - add(esi, ptr[&m_local.gd->fzbr]); - - // GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2]; - - lea(edi, ptr[ebx * 2]); - add(edi, ptr[&m_local.gd->fzbc]); - - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) - { - // edx = &m_local.d[skip] - - lea(edx, ptr[edx * 8 + (size_t)m_local.d]); - - // ebx = &v - - mov(ebx, ptr[esp + _v]); - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) - { - vbroadcastf128(ymm0, ptr[ebx + offsetof(GSVertexSW, p)]); // v.p - - if(m_sel.fwrite && m_sel.fge) - { - // f = GSVector8i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); - - vcvttps2dq(ymm1, ymm0); - vpshufhw(ymm1, ymm1, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(ymm1, ymm1, _MM_SHUFFLE(2, 2, 2, 2)); - vpaddw(ymm1, ptr[edx + offsetof(GSScanlineLocalData::skip, f)]); - - vmovdqa(ptr[&m_local.temp.f], ymm1); - } - - if(m_sel.zb) - { - // z = vp.zzzz() + m_local.d[skip].z; - - vshufps(ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - vmovaps(ptr[&m_local.temp.z], ymm0); - vmovaps(ymm2, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]); - vmovaps(ptr[&m_local.temp.zo], ymm2); - vaddps(ymm0, ymm2); - } - } - } - else - { - if(m_sel.ztest) - { - vpbroadcastd(ymm0, ptr[&m_local.p.z]); - } - } - - if(m_sel.fb) - { - if(m_sel.edge || m_sel.tfx != TFX_NONE) - { - vbroadcastf128(ymm4, ptr[ebx + offsetof(GSVertexSW, t)]); // v.t - } - - if(m_sel.edge) - { - // m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9); - - vpshufhw(ymm3, ymm4, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(ymm3, ymm3, _MM_SHUFFLE(3, 3, 3, 3)); - vpsrlw(ymm3, 9); - - vmovdqa(ptr[&m_local.temp.cov], ymm3); - } - - if(m_sel.tfx != TFX_NONE) - { - if(m_sel.fst) - { - // GSVector4i vti(vt); - - vcvttps2dq(ymm6, ymm4); - - // s = vti.xxxx() + m_local.d[skip].s; - // t = vti.yyyy(); if(!sprite) t += m_local.d[skip].t; - - vpshufd(ymm2, ymm6, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(ymm3, ymm6, _MM_SHUFFLE(1, 1, 1, 1)); - - vpaddd(ymm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) - { - vpaddd(ymm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); - } - else - { - if(m_sel.ltf) - { - vpshuflw(ymm6, ymm3, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm6, ymm6, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(ymm6, 12); - vmovdqa(ptr[&m_local.temp.vf], ymm6); - } - } - - vmovdqa(ptr[&m_local.temp.s], ymm2); - vmovdqa(ptr[&m_local.temp.t], ymm3); - } - else - { - // s = vt.xxxx() + m_local.d[skip].s; - // t = vt.yyyy() + m_local.d[skip].t; - // q = vt.zzzz() + m_local.d[skip].q; - - vshufps(ymm2, ymm4, ymm4, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(ymm3, ymm4, ymm4, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(ymm4, ymm4, ymm4, _MM_SHUFFLE(2, 2, 2, 2)); - - vaddps(ymm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - vaddps(ymm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); - vaddps(ymm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]); - - vmovaps(ptr[&m_local.temp.s], ymm2); - vmovaps(ptr[&m_local.temp.t], ymm3); - vmovaps(ptr[&m_local.temp.q], ymm4); - } - } - - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) - { - if(m_sel.iip) - { - // GSVector4i vc = GSVector4i(v.c); - - vbroadcastf128(ymm6, ptr[ebx + offsetof(GSVertexSW, c)]); // v.c - vcvttps2dq(ymm6, ymm6); - - // vc = vc.upl16(vc.zwxy()); - - vpshufd(ymm5, ymm6, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(ymm6, ymm5); - - // rb = vc.xxxx().add16(m_local.d[skip].rb); - // ga = vc.zzzz().add16(m_local.d[skip].ga); - - vpshufd(ymm5, ymm6, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(ymm6, ymm6, _MM_SHUFFLE(2, 2, 2, 2)); - - vpaddw(ymm5, ptr[edx + offsetof(GSScanlineLocalData::skip, rb)]); - vpaddw(ymm6, ptr[edx + offsetof(GSScanlineLocalData::skip, ga)]); - - vmovdqa(ptr[&m_local.temp.rb], ymm5); - vmovdqa(ptr[&m_local.temp.ga], ymm6); - } - else - { - if(m_sel.tfx == TFX_NONE) - { - vmovdqa(ymm5, ptr[&m_local.c.rb]); - vmovdqa(ymm6, ptr[&m_local.c.ga]); - } - } - } - } -} - -void GSDrawScanlineCodeGenerator::Step() -{ - // steps -= 8; - - sub(ecx, 8); - - // fza_offset += 2; - - add(edi, 16); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // zo += GSVector8::broadcast32(&m_local.d8.p.z); - - if(m_sel.zb) - { - vbroadcastss(ymm0, ptr[&m_local.d8.p.z]); - vaddps(ymm0, ptr[&m_local.temp.zo]); - vmovaps(ptr[&m_local.temp.zo], ymm0); - vaddps(ymm0, ptr[&m_local.temp.z]); - } - - // f = f.add16(GSVector8i::broadcast16(&m_local.d8.p.f)); - - if(m_sel.fwrite && m_sel.fge) - { - vpbroadcastw(ymm1, ptr[&m_local.d8.p.f]); - vpaddw(ymm1, ptr[&m_local.temp.f]); - vmovdqa(ptr[&m_local.temp.f], ymm1); - } - } - else - { - if(m_sel.ztest) - { - vpbroadcastd(ymm0, ptr[&m_local.p.z]); - } - } - - if(m_sel.fb) - { - if(m_sel.tfx != TFX_NONE) - { - if(m_sel.fst) - { - // GSVector8i stq = GSVector8i::cast(GSVector8(m_local.d8.stq)); - - vbroadcasti128(ymm4, ptr[&m_local.d8.stq]); - - // s = GSVector8::cast(GSVector8i::cast(s) + stq.xxxx()); - - vpshufd(ymm2, ymm4, _MM_SHUFFLE(0, 0, 0, 0)); - vpaddd(ymm2, ptr[&m_local.temp.s]); - vmovdqa(ptr[&m_local.temp.s], ymm2); - - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) - { - // t = GSVector8::cast(GSVector8i::cast(t) + stq.yyyy()); - - vpshufd(ymm3, ymm4, _MM_SHUFFLE(1, 1, 1, 1)); - vpaddd(ymm3, ptr[&m_local.temp.t]); - vmovdqa(ptr[&m_local.temp.t], ymm3); - } - else - { - vmovdqa(ymm3, ptr[&m_local.temp.t]); - } - } - else - { - // GSVector8 stq(m_local.d8.stq); - - // s += stq.xxxx(); - // t += stq.yyyy(); - // q += stq.zzzz(); - - vbroadcastf128(ymm4, ptr[&m_local.d8.stq]); - - vshufps(ymm2, ymm4, ymm4, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(ymm3, ymm4, ymm4, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(ymm4, ymm4, ymm4, _MM_SHUFFLE(2, 2, 2, 2)); - - vaddps(ymm2, ptr[&m_local.temp.s]); - vaddps(ymm3, ptr[&m_local.temp.t]); - vaddps(ymm4, ptr[&m_local.temp.q]); - - vmovaps(ptr[&m_local.temp.s], ymm2); - vmovaps(ptr[&m_local.temp.t], ymm3); - vmovaps(ptr[&m_local.temp.q], ymm4); - } - } - - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) - { - if(m_sel.iip) - { - // GSVector8i c = GSVector8i::broadcast64(&m_local.d8.c); - - vpbroadcastq(ymm7, ptr[&m_local.d8.c]); - - // rb = rb.add16(c.xxxx()).max_i16(GSVector8i::zero()); - // ga = ga.add16(c.yyyy()).max_i16(GSVector8i::zero()); - - vpshufd(ymm5, ymm7, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(ymm6, ymm7, _MM_SHUFFLE(1, 1, 1, 1)); - - vpaddw(ymm5, ptr[&m_local.temp.rb]); - vpaddw(ymm6, ptr[&m_local.temp.ga]); - - // FIXME: color may underflow and roll over at the end of the line, if decreasing - - vpxor(ymm7, ymm7); - vpmaxsw(ymm5, ymm7); - vpmaxsw(ymm6, ymm7); - - vmovdqa(ptr[&m_local.temp.rb], ymm5); - vmovdqa(ptr[&m_local.temp.ga], ymm6); - } - else - { - if(m_sel.tfx == TFX_NONE) - { - vmovdqa(ymm5, ptr[&m_local.c.rb]); - vmovdqa(ymm6, ptr[&m_local.c.ga]); - } - } - } - } - - if(!m_sel.notest) - { - // test = m_test[15 + (steps & (steps >> 31))]; - - mov(edx, ecx); - sar(edx, 31); - and(edx, ecx); - - vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[15]]); - } -} - -void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) -{ - if(!m_sel.zb) - { - return; - } - - // int za = fza_base.y + fza_offset->y; - - mov(ebp, ptr[esi + 4]); - add(ebp, ptr[edi + 4]); - - // GSVector8i zs = zi; - - if(m_sel.prim != GS_SPRITE_CLASS) - { - if(m_sel.zoverflow) - { - // zs = (GSVector8i(z * 0.5f) << 1) | (GSVector8i(z) & GSVector8i::x00000001()); - - vbroadcastss(temp1, ptr[&GSVector8::m_half]); - vmulps(temp1, ymm0); - vcvttps2dq(temp1, temp1); - vpslld(temp1, 1); - - vcvttps2dq(ymm0, ymm0); - vpcmpeqd(temp2, temp2); - vpsrld(temp2, 31); - vpand(ymm0, temp2); - - vpor(ymm0, temp1); - } - else - { - // zs = GSVector8i(z); - - vcvttps2dq(ymm0, ymm0); - } - - if(m_sel.zwrite) - { - vmovdqa(ptr[&m_local.temp.zs], ymm0); - } - } - - if(m_sel.ztest) - { - ReadPixel(ymm1, temp1, ebp); - - if(m_sel.zwrite && m_sel.zpsm < 2) - { - vmovdqa(ptr[&m_local.temp.zd], ymm1); - } - - // zd &= 0xffffffff >> m_sel.zpsm * 8; - - if(m_sel.zpsm) - { - vpslld(ymm1, (uint8)(m_sel.zpsm * 8)); - vpsrld(ymm1, (uint8)(m_sel.zpsm * 8)); - } - - if(m_sel.zoverflow || m_sel.zpsm == 0) - { - // GSVector8i o = GSVector8i::x80000000(); - - vpcmpeqd(temp1, temp1); - vpslld(temp1, 31); - - // GSVector8i zso = zs - o; - // GSVector8i zdo = zd - o; - - vpsubd(ymm0, temp1); - vpsubd(ymm1, temp1); - } - - switch(m_sel.ztst) - { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - vpcmpgtd(ymm1, ymm0); - vpor(ymm7, ymm1); - break; - - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - vpcmpgtd(ymm0, ymm1); - vpcmpeqd(temp1, temp1); - vpxor(ymm0, temp1); - vpor(ymm7, ymm0); - break; - } - - alltrue(); - } -} - -void GSDrawScanlineCodeGenerator::SampleTexture() -{ - if(!m_sel.fb || m_sel.tfx == TFX_NONE) - { - return; - } - - mov(ebx, ptr[&m_local.gd->tex[0]]); - - if(m_sel.tlu) - { - mov(edx, ptr[&m_local.gd->clut]); - } - - // ebx = tex - // edx = clut - - if(!m_sel.fst) - { - vrcpps(ymm0, ymm4); - - vmulps(ymm2, ymm0); - vmulps(ymm3, ymm0); - - vcvttps2dq(ymm2, ymm2); - vcvttps2dq(ymm3, ymm3); - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - vmovd(xmm4, eax); - vpbroadcastd(ymm4, xmm4); - - vpsubd(ymm2, ymm4); - vpsubd(ymm3, ymm4); - } - } - - // ymm2 = u - // ymm3 = v - - if(m_sel.ltf) - { - // GSVector8i uf = u.xxzzlh().srl16(1); - - vpshuflw(ymm0, ymm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(ymm0, 12); - vmovdqa(ptr[&m_local.temp.uf], ymm0); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector8i vf = v.xxzzlh().srl16(1); - - vpshuflw(ymm0, ymm3, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(ymm0, 12); - vmovdqa(ptr[&m_local.temp.vf], ymm0); - } - } - - // GSVector8i uv0 = u.sra32(16).ps32(v.sra32(16)); - - vpsrad(ymm2, 16); - vpsrad(ymm3, 16); - vpackssdw(ymm2, ymm3); - - if(m_sel.ltf) - { - // GSVector8i uv1 = uv0.add16(GSVector8i::x0001()); - - vpcmpeqd(ymm1, ymm1); - vpsrlw(ymm1, 15); - vpaddw(ymm3, ymm2, ymm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - Wrap(ymm2, ymm3); - } - else - { - // uv0 = Wrap(uv0); - - Wrap(ymm2); - } - - // ymm2 = uv0 - // ymm3 = uv1 (ltf) - // ymm0, ymm1, ymm4, ymm5, ymm6 = free - // ymm7 = used - - // GSVector8i y0 = uv0.uph16() << tw; - // GSVector8i x0 = uv0.upl16(); - - vpxor(ymm0, ymm0); - - vpunpcklwd(ymm4, ymm2, ymm0); - vpunpckhwd(ymm2, ymm2, ymm0); - vpslld(ymm2, (uint8)(m_sel.tw + 3)); - - // ymm0 = 0 - // ymm2 = y0 - // ymm3 = uv1 (ltf) - // ymm4 = x0 - // ymm1, ymm5, ymm6 = free - // ymm7 = used - - if(m_sel.ltf) - { - // GSVector8i y1 = uv1.uph16() << tw; - // GSVector8i x1 = uv1.upl16(); - - vpunpcklwd(ymm6, ymm3, ymm0); - vpunpckhwd(ymm3, ymm3, ymm0); - vpslld(ymm3, (uint8)(m_sel.tw + 3)); - - // ymm2 = y0 - // ymm3 = y1 - // ymm4 = x0 - // ymm6 = x1 - // ymm0, ymm5, ymm6 = free - // ymm7 = used - - // GSVector8i addr00 = y0 + x0; - // GSVector8i addr01 = y0 + x1; - // GSVector8i addr10 = y1 + x0; - // GSVector8i addr11 = y1 + x1; - - vpaddd(ymm5, ymm2, ymm4); - vpaddd(ymm2, ymm2, ymm6); - vpaddd(ymm0, ymm3, ymm4); - vpaddd(ymm3, ymm3, ymm6); - - // ymm5 = addr00 - // ymm2 = addr01 - // ymm0 = addr10 - // ymm3 = addr11 - // ymm1, ymm4, ymm6 = free - // ymm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 0); - - // ymm6 = c00 - // ymm4 = c01 - // ymm1 = c10 - // ymm5 = c11 - // ymm0, ymm2, ymm3 = free - // ymm7 = used - - vmovdqa(ymm0, ptr[&m_local.temp.uf]); - - // GSVector8i rb00 = c00 & mask; - // GSVector8i ga00 = (c00 >> 8) & mask; - - vpsllw(ymm2, ymm6, 8); - vpsrlw(ymm2, 8); - vpsrlw(ymm6, 8); - - // GSVector8i rb01 = c01 & mask; - // GSVector8i ga01 = (c01 >> 8) & mask; - - vpsllw(ymm3, ymm4, 8); - vpsrlw(ymm3, 8); - vpsrlw(ymm4, 8); - - // ymm0 = uf - // ymm2 = rb00 - // ymm3 = rb01 - // ymm6 = ga00 - // ymm4 = ga01 - // ymm1 = c10 - // ymm5 = c11 - // ymm7 = used - - // rb00 = rb00.lerp16_4(rb01, uf); - // ga00 = ga00.lerp16_4(ga01, uf); - - lerp16_4(ymm3, ymm2, ymm0); - lerp16_4(ymm4, ymm6, ymm0); - - // ymm0 = uf - // ymm3 = rb00 - // ymm4 = ga00 - // ymm1 = c10 - // ymm5 = c11 - // ymm2, ymm6 = free - // ymm7 = used - - // GSVector8i rb10 = c10 & mask; - // GSVector8i ga10 = (c10 >> 8) & mask; - - vpsrlw(ymm2, ymm1, 8); - vpsllw(ymm1, 8); - vpsrlw(ymm1, 8); - - // GSVector8i rb11 = c11 & mask; - // GSVector8i ga11 = (c11 >> 8) & mask; - - vpsrlw(ymm6, ymm5, 8); - vpsllw(ymm5, 8); - vpsrlw(ymm5, 8); - - // ymm0 = uf - // ymm3 = rb00 - // ymm4 = ga00 - // ymm1 = rb10 - // ymm5 = rb11 - // ymm2 = ga10 - // ymm6 = ga11 - // ymm7 = used - - // rb10 = rb10.lerp16_4(rb11, uf); - // ga10 = ga10.lerp16_4(ga11, uf); - - lerp16_4(ymm5, ymm1, ymm0); - lerp16_4(ymm6, ymm2, ymm0); - - // ymm3 = rb00 - // ymm4 = ga00 - // ymm5 = rb10 - // ymm6 = ga10 - // ymm0, ymm1, ymm2 = free - // ymm7 = used - - // rb00 = rb00.lerp16_4(rb10, vf); - // ga00 = ga00.lerp16_4(ga10, vf); - - vmovdqa(ymm0, ptr[&m_local.temp.vf]); - - lerp16_4(ymm5, ymm3, ymm0); - lerp16_4(ymm6, ymm4, ymm0); - } - else - { - // GSVector8i addr00 = y0 + x0; - - vpaddd(ymm5, ymm2, ymm4); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 0); - - // GSVector8i mask = GSVector8i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - vpsllw(ymm5, ymm6, 8); - vpsrlw(ymm5, 8); - vpsrlw(ymm6, 8); - } -} - -void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv) -{ - // ymm0, ymm1, ymm4, ymm5, ymm6 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vbroadcasti128(ymm0, ptr[&m_local.gd->t.min]); - vpmaxsw(uv, ymm0); - } - else - { - vpxor(ymm0, ymm0); - vpmaxsw(uv, ymm0); - } - - vbroadcasti128(ymm0, ptr[&m_local.gd->t.max]); - vpminsw(uv, ymm0); - } - else - { - vbroadcasti128(ymm0, ptr[&m_local.gd->t.min]); - vpand(uv, ymm0); - - if(region) - { - vbroadcasti128(ymm0, ptr[&m_local.gd->t.max]); - vpor(uv, ymm0); - } - } - } - else - { - vbroadcasti128(ymm4, ptr[&m_local.gd->t.min]); - vbroadcasti128(ymm5, ptr[&m_local.gd->t.max]); - vbroadcasti128(ymm0, ptr[&m_local.gd->t.mask]); - - // GSVector8i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(ymm1, uv, ymm4); - - if(region) - { - vpor(ymm1, ymm5); - } - - // GSVector8i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv, ymm4); - vpminsw(uv, ymm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv, ymm1, ymm0); - } -} - -void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) -{ - // ymm0, ymm1, ymm4, ymm5, ymm6 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vbroadcasti128(ymm4, ptr[&m_local.gd->t.min]); - vpmaxsw(uv0, ymm4); - vpmaxsw(uv1, ymm4); - } - else - { - vpxor(ymm0, ymm0); - vpmaxsw(uv0, ymm0); - vpmaxsw(uv1, ymm0); - } - - vbroadcasti128(ymm5, ptr[&m_local.gd->t.max]); - vpminsw(uv0, ymm5); - vpminsw(uv1, ymm5); - } - else - { - vbroadcasti128(ymm4, ptr[&m_local.gd->t.min]); - vpand(uv0, ymm4); - vpand(uv1, ymm4); - - if(region) - { - vbroadcasti128(ymm5, ptr[&m_local.gd->t.max]); - vpor(uv0, ymm5); - vpor(uv1, ymm5); - } - } - } - else - { - vbroadcasti128(ymm4, ptr[&m_local.gd->t.min]); - vbroadcasti128(ymm5, ptr[&m_local.gd->t.max]); - vbroadcasti128(ymm0, ptr[&m_local.gd->t.mask]); - - // uv0 - - // GSVector8i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(ymm1, uv0, ymm4); - - if(region) - { - vpor(ymm1, ymm5); - } - - // GSVector8i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv0, ymm4); - vpminsw(uv0, ymm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv0, ymm1, ymm0); - - // uv1 - - // GSVector8i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(ymm1, uv1, ymm4); - - if(region) - { - vpor(ymm1, ymm5); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv1, ymm4); - vpminsw(uv1, ymm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv1, ymm1, ymm0); - } -} - -void GSDrawScanlineCodeGenerator::SampleTextureLOD() -{ - if(!m_sel.fb || m_sel.tfx == TFX_NONE) - { - return; - } - - push(ebp); - - mov(ebp, (size_t)m_local.gd->tex); - - if(m_sel.tlu) - { - mov(edx, ptr[&m_local.gd->clut]); - } - - if(!m_sel.fst) - { - vrcpps(ymm0, ymm4); - - vmulps(ymm2, ymm0); - vmulps(ymm3, ymm0); - - vcvttps2dq(ymm2, ymm2); - vcvttps2dq(ymm3, ymm3); - } - - // ymm2 = u - // ymm3 = v - // ymm4 = q - // ymm0 = ymm1 = ymm5 = ymm6 = free - - // TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (just take the exp) (but can we round it?) - - if(!m_sel.lcm) - { - // lod = -log2(Q) * (1 << L) + K - - vpcmpeqd(ymm1, ymm1); - vpsrld(ymm1, ymm1, 25); - vpslld(ymm0, ymm4, 1); - vpsrld(ymm0, ymm0, 24); - vpsubd(ymm0, ymm1); - vcvtdq2ps(ymm0, ymm0); - - // ymm0 = (float)(exp(q) - 127) - - vpslld(ymm4, ymm4, 9); - vpsrld(ymm4, ymm4, 9); - vorps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - - // ymm4 = mant(q) | 1.0f - - if(m_cpu.has(util::Cpu::tFMA)) - { - vmovaps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0 - vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * ymm4 + c1 - vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * ymm4 + c1) * ymm4 + c2 - vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // ymm4 - 1.0f - vfmadd213ps(ymm4, ymm5, ymm0); // ((c0 * ymm4 + c1) * ymm4 + c2) * (ymm4 - 1.0f) + ymm0 - } - else - { - vmulps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); - vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); - vmulps(ymm5, ymm4); - vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); - vmulps(ymm4, ymm5); - vaddps(ymm4, ymm0); - } - - // ymm4 = log2(Q) = ((((c0 * ymm4) + c1) * ymm4) + c2) * (ymm4 - 1.0f) + ymm0 - - if(m_cpu.has(util::Cpu::tFMA)) - { - vmovaps(ymm5, ptr[&m_local.gd->l]); - vfmadd213ps(ymm4, ymm5, ptr[&m_local.gd->k]); - } - else - { - vmulps(ymm4, ptr[&m_local.gd->l]); - vaddps(ymm4, ptr[&m_local.gd->k]); - } - - // ymm4 = (-log2(Q) * (1 << L) + K) * 0x10000 - - vxorps(ymm0, ymm0); - vminps(ymm4, ptr[&m_local.gd->mxl]); - vmaxps(ymm4, ymm0); - vcvtps2dq(ymm4, ymm4); - - if(m_sel.mmin == 1) // round-off mode - { - mov(eax, 0x8000); - vmovd(xmm0, eax); - vpbroadcastd(ymm0, xmm0); - vpaddd(ymm4, ymm0); - } - - vpsrld(ymm0, ymm4, 16); - - vmovdqa(ptr[&m_local.temp.lod.i], ymm0); -/* -vpslld(ymm5, ymm0, 6); -vpslld(ymm6, ymm4, 16); -vpsrld(ymm6, ymm6, 24); -return; -*/ - if(m_sel.mmin == 2) // trilinear mode - { - vpshuflw(ymm1, ymm4, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm1, ymm1, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.temp.lod.f], ymm1); - } - - // shift u/v/minmax by (int)lod - - vpsravd(ymm2, ymm2, ymm0); - vpsravd(ymm3, ymm3, ymm0); - - vmovdqa(ptr[&m_local.temp.uv[0]], ymm2); - vmovdqa(ptr[&m_local.temp.uv[1]], ymm3); - - // m_local.gd->t.minmax => m_local.temp.uv_minmax[0/1] - - vpxor(ymm1, ymm1); - - vbroadcasti128(ymm4, ptr[&m_local.gd->t.min]); - vpunpcklwd(ymm5, ymm4, ymm1); // minu - vpunpckhwd(ymm6, ymm4, ymm1); // minv - vpsrlvd(ymm5, ymm5, ymm0); - vpsrlvd(ymm6, ymm6, ymm0); - vpackusdw(ymm5, ymm6); - - vbroadcasti128(ymm4, ptr[&m_local.gd->t.max]); - vpunpcklwd(ymm6, ymm4, ymm1); // maxu - vpunpckhwd(ymm4, ymm4, ymm1); // maxv - vpsrlvd(ymm6, ymm6, ymm0); - vpsrlvd(ymm4, ymm4, ymm0); - vpackusdw(ymm6, ymm4); - - vmovdqa(ptr[&m_local.temp.uv_minmax[0]], ymm5); - vmovdqa(ptr[&m_local.temp.uv_minmax[1]], ymm6); - } - else - { - // lod = K - - vmovd(xmm0, ptr[&m_local.gd->lod.i.u32[0]]); - - vpsrad(ymm2, xmm0); - vpsrad(ymm3, xmm0); - - vmovdqa(ptr[&m_local.temp.uv[0]], ymm2); - vmovdqa(ptr[&m_local.temp.uv[1]], ymm3); - - vmovdqa(ymm5, ptr[&m_local.temp.uv_minmax[0]]); - vmovdqa(ymm6, ptr[&m_local.temp.uv_minmax[1]]); - } - - // ymm2 = m_local.temp.uv[0] = u (level m) - // ymm3 = m_local.temp.uv[1] = v (level m) - // ymm5 = minuv - // ymm6 = maxuv - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - vmovd(xmm4, eax); - vpbroadcastd(ymm4, xmm4); - - vpsubd(ymm2, ymm4); - vpsubd(ymm3, ymm4); - - // GSVector8i uf = u.xxzzlh().srl16(1); - - vpshuflw(ymm0, ymm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(ymm0, 12); - vmovdqa(ptr[&m_local.temp.uf], ymm0); - - // GSVector8i vf = v.xxzzlh().srl16(1); - - vpshuflw(ymm0, ymm3, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(ymm0, 12); - vmovdqa(ptr[&m_local.temp.vf], ymm0); - } - - // GSVector8i uv0 = u.sra32(16).ps32(v.sra32(16)); - - vpsrad(ymm2, 16); - vpsrad(ymm3, 16); - vpackssdw(ymm2, ymm3); - - if(m_sel.ltf) - { - // GSVector8i uv1 = uv0.add16(GSVector8i::x0001()); - - vpcmpeqd(ymm1, ymm1); - vpsrlw(ymm1, 15); - vpaddw(ymm3, ymm2, ymm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - WrapLOD(ymm2, ymm3); - } - else - { - // uv0 = Wrap(uv0); - - WrapLOD(ymm2); - } - - // ymm2 = uv0 - // ymm3 = uv1 (ltf) - // ymm0, ymm1, ymm4, ymm5, ymm6 = free - // ymm7 = used - - // GSVector8i x0 = uv0.upl16(); - // GSVector8i y0 = uv0.uph16() << tw; - - vpxor(ymm0, ymm0); - - vpunpcklwd(ymm4, ymm2, ymm0); - vpunpckhwd(ymm2, ymm2, ymm0); - vpslld(ymm2, (uint8)(m_sel.tw + 3)); - - // ymm0 = 0 - // ymm2 = y0 - // ymm3 = uv1 (ltf) - // ymm4 = x0 - // ymm1, ymm5, ymm6 = free - // ymm7 = used - - if(m_sel.ltf) - { - // GSVector8i x1 = uv1.upl16(); - // GSVector8i y1 = uv1.uph16() << tw; - - vpunpcklwd(ymm6, ymm3, ymm0); - vpunpckhwd(ymm3, ymm3, ymm0); - vpslld(ymm3, (uint8)(m_sel.tw + 3)); - - // ymm2 = y0 - // ymm3 = y1 - // ymm4 = x0 - // ymm6 = x1 - // ymm0, ymm5, ymm6 = free - // ymm7 = used - - // GSVector8i addr00 = y0 + x0; - // GSVector8i addr01 = y0 + x1; - // GSVector8i addr10 = y1 + x0; - // GSVector8i addr11 = y1 + x1; - - vpaddd(ymm5, ymm2, ymm4); - vpaddd(ymm2, ymm2, ymm6); - vpaddd(ymm0, ymm3, ymm4); - vpaddd(ymm3, ymm3, ymm6); - - // ymm5 = addr00 - // ymm2 = addr01 - // ymm0 = addr10 - // ymm3 = addr11 - // ymm1, ymm4, ymm6 = free - // ymm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 0); - - // ymm6 = c00 - // ymm4 = c01 - // ymm1 = c10 - // ymm5 = c11 - // ymm0, ymm2, ymm3 = free - // ymm7 = used - - vmovdqa(ymm0, ptr[&m_local.temp.uf]); - - // GSVector8i rb00 = c00 & mask; - // GSVector8i ga00 = (c00 >> 8) & mask; - - vpsllw(ymm2, ymm6, 8); - vpsrlw(ymm2, 8); - vpsrlw(ymm6, 8); - - // GSVector8i rb01 = c01 & mask; - // GSVector8i ga01 = (c01 >> 8) & mask; - - vpsllw(ymm3, ymm4, 8); - vpsrlw(ymm3, 8); - vpsrlw(ymm4, 8); - - // ymm0 = uf - // ymm2 = rb00 - // ymm3 = rb01 - // ymm6 = ga00 - // ymm4 = ga01 - // ymm1 = c10 - // ymm5 = c11 - // ymm7 = used - - // rb00 = rb00.lerp16_4(rb01, uf); - // ga00 = ga00.lerp16_4(ga01, uf); - - lerp16_4(ymm3, ymm2, ymm0); - lerp16_4(ymm4, ymm6, ymm0); - - // ymm0 = uf - // ymm3 = rb00 - // ymm4 = ga00 - // ymm1 = c10 - // ymm5 = c11 - // ymm2, ymm6 = free - // ymm7 = used - - // GSVector8i rb10 = c10 & mask; - // GSVector8i ga10 = (c10 >> 8) & mask; - - vpsrlw(ymm2, ymm1, 8); - vpsllw(ymm1, 8); - vpsrlw(ymm1, 8); - - // GSVector8i rb11 = c11 & mask; - // GSVector8i ga11 = (c11 >> 8) & mask; - - vpsrlw(ymm6, ymm5, 8); - vpsllw(ymm5, 8); - vpsrlw(ymm5, 8); - - // ymm0 = uf - // ymm3 = rb00 - // ymm4 = ga00 - // ymm1 = rb10 - // ymm5 = rb11 - // ymm2 = ga10 - // ymm6 = ga11 - // ymm7 = used - - // rb10 = rb10.lerp16_4(rb11, uf); - // ga10 = ga10.lerp16_4(ga11, uf); - - lerp16_4(ymm5, ymm1, ymm0); - lerp16_4(ymm6, ymm2, ymm0); - - // ymm3 = rb00 - // ymm4 = ga00 - // ymm5 = rb10 - // ymm6 = ga10 - // ymm0, ymm1, ymm2 = free - // ymm7 = used - - // rb00 = rb00.lerp16_4(rb10, vf); - // ga00 = ga00.lerp16_4(ga10, vf); - - vmovdqa(ymm0, ptr[&m_local.temp.vf]); - - lerp16_4(ymm5, ymm3, ymm0); - lerp16_4(ymm6, ymm4, ymm0); - } - else - { - // GSVector8i addr00 = y0 + x0; - - vpaddd(ymm5, ymm2, ymm4); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 0); - - // GSVector8i mask = GSVector8i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - vpsllw(ymm5, ymm6, 8); - vpsrlw(ymm5, 8); - vpsrlw(ymm6, 8); - } - - if(m_sel.mmin != 1) // !round-off mode - { - vmovdqa(ptr[&m_local.temp.trb], ymm5); - vmovdqa(ptr[&m_local.temp.tga], ymm6); - - vmovdqa(ymm2, ptr[&m_local.temp.uv[0]]); - vmovdqa(ymm3, ptr[&m_local.temp.uv[1]]); - - vpsrad(ymm2, 1); - vpsrad(ymm3, 1); - - vmovdqa(ymm5, ptr[&m_local.temp.uv_minmax[0]]); - vmovdqa(ymm6, ptr[&m_local.temp.uv_minmax[1]]); - - vpsrlw(ymm5, 1); - vpsrlw(ymm6, 1); - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - vmovd(xmm4, eax); - vpbroadcastd(ymm4, xmm4); - - vpsubd(ymm2, ymm4); - vpsubd(ymm3, ymm4); - - // GSVector8i uf = u.xxzzlh().srl16(1); - - vpshuflw(ymm0, ymm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(ymm0, 12); - vmovdqa(ptr[&m_local.temp.uf], ymm0); - - // GSVector8i vf = v.xxzzlh().srl16(1); - - vpshuflw(ymm0, ymm3, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(ymm0, 12); - vmovdqa(ptr[&m_local.temp.vf], ymm0); - } - - // GSVector8i uv0 = u.sra32(16).ps32(v.sra32(16)); - - vpsrad(ymm2, 16); - vpsrad(ymm3, 16); - vpackssdw(ymm2, ymm3); - - if(m_sel.ltf) - { - // GSVector8i uv1 = uv0.add16(GSVector4i::x0001()); - - vpcmpeqd(ymm1, ymm1); - vpsrlw(ymm1, 15); - vpaddw(ymm3, ymm2, ymm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - WrapLOD(ymm2, ymm3); - } - else - { - // uv0 = Wrap(uv0); - - WrapLOD(ymm2); - } - - // ymm2 = uv0 - // ymm3 = uv1 (ltf) - // ymm0, ymm1, ymm4, ymm5, ymm6 = free - // ymm7 = used - - // GSVector8i x0 = uv0.upl16(); - // GSVector8i y0 = uv0.uph16() << tw; - - vpxor(ymm0, ymm0); - - vpunpcklwd(ymm4, ymm2, ymm0); - vpunpckhwd(ymm2, ymm2, ymm0); - vpslld(ymm2, (uint8)(m_sel.tw + 3)); - - // ymm0 = 0 - // ymm2 = y0 - // ymm3 = uv1 (ltf) - // ymm4 = x0 - // ymm1, ymm5, ymm6 = free - // ymm7 = used - - if(m_sel.ltf) - { - // GSVector8i x1 = uv1.upl16(); - // GSVector8i y1 = uv1.uph16() << tw; - - vpunpcklwd(ymm6, ymm3, ymm0); - vpunpckhwd(ymm3, ymm3, ymm0); - vpslld(ymm3, (uint8)(m_sel.tw + 3)); - - // ymm2 = y0 - // ymm3 = y1 - // ymm4 = x0 - // ymm6 = x1 - // ymm0, ymm5, ymm6 = free - // ymm7 = used - - // GSVector8i addr00 = y0 + x0; - // GSVector8i addr01 = y0 + x1; - // GSVector8i addr10 = y1 + x0; - // GSVector8i addr11 = y1 + x1; - - vpaddd(ymm5, ymm2, ymm4); - vpaddd(ymm2, ymm2, ymm6); - vpaddd(ymm0, ymm3, ymm4); - vpaddd(ymm3, ymm3, ymm6); - - // ymm5 = addr00 - // ymm2 = addr01 - // ymm0 = addr10 - // ymm3 = addr11 - // ymm1, ymm4, ymm6 = free - // ymm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 1); - - // ymm6 = c00 - // ymm4 = c01 - // ymm1 = c10 - // ymm5 = c11 - // ymm0, ymm2, ymm3 = free - // ymm7 = used - - vmovdqa(ymm0, ptr[&m_local.temp.uf]); - - // GSVector8i rb00 = c00 & mask; - // GSVector8i ga00 = (c00 >> 8) & mask; - - vpsllw(ymm2, ymm6, 8); - vpsrlw(ymm2, 8); - vpsrlw(ymm6, 8); - - // GSVector8i rb01 = c01 & mask; - // GSVector8i ga01 = (c01 >> 8) & mask; - - vpsllw(ymm3, ymm4, 8); - vpsrlw(ymm3, 8); - vpsrlw(ymm4, 8); - - // ymm0 = uf - // ymm2 = rb00 - // ymm3 = rb01 - // ymm6 = ga00 - // ymm4 = ga01 - // ymm1 = c10 - // ymm5 = c11 - // ymm7 = used - - // rb00 = rb00.lerp16_4(rb01, uf); - // ga00 = ga00.lerp16_4(ga01, uf); - - lerp16_4(ymm3, ymm2, ymm0); - lerp16_4(ymm4, ymm6, ymm0); - - // ymm0 = uf - // ymm3 = rb00 - // ymm4 = ga00 - // ymm1 = c10 - // ymm5 = c11 - // ymm2, ymm6 = free - // ymm7 = used - - // GSVector8i rb10 = c10 & mask; - // GSVector8i ga10 = (c10 >> 8) & mask; - - vpsrlw(ymm2, ymm1, 8); - vpsllw(ymm1, 8); - vpsrlw(ymm1, 8); - - // GSVector8i rb11 = c11 & mask; - // GSVector8i ga11 = (c11 >> 8) & mask; - - vpsrlw(ymm6, ymm5, 8); - vpsllw(ymm5, 8); - vpsrlw(ymm5, 8); - - // ymm0 = uf - // ymm3 = rb00 - // ymm4 = ga00 - // ymm1 = rb10 - // ymm5 = rb11 - // ymm2 = ga10 - // ymm6 = ga11 - // ymm7 = used - - // rb10 = rb10.lerp16_4(rb11, uf); - // ga10 = ga10.lerp16_4(ga11, uf); - - lerp16_4(ymm5, ymm1, ymm0); - lerp16_4(ymm6, ymm2, ymm0); - - // ymm3 = rb00 - // ymm4 = ga00 - // ymm5 = rb10 - // ymm6 = ga10 - // ymm0, ymm1, ymm2 = free - // ymm7 = used - - // rb00 = rb00.lerp16_4(rb10, vf); - // ga00 = ga00.lerp16_4(ga10, vf); - - vmovdqa(ymm0, ptr[&m_local.temp.vf]); - - lerp16_4(ymm5, ymm3, ymm0); - lerp16_4(ymm6, ymm4, ymm0); - } - else - { - // GSVector8i addr00 = y0 + x0; - - vpaddd(ymm5, ymm2, ymm4); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 1); - - // GSVector8i mask = GSVector8i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - vpsllw(ymm5, ymm6, 8); - vpsrlw(ymm5, 8); - vpsrlw(ymm6, 8); - } - - vmovdqa(ymm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]); - vpsrlw(ymm0, ymm0, 1); - - vmovdqa(ymm2, ptr[&m_local.temp.trb]); - vmovdqa(ymm3, ptr[&m_local.temp.tga]); - - lerp16(ymm5, ymm2, ymm0, 0); - lerp16(ymm6, ymm3, ymm0, 0); - } - - pop(ebp); -} - -void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv) -{ - // ymm5 = minuv - // ymm6 = maxuv - // ymm0, ymm1, ymm4 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vpmaxsw(uv, ymm5); - } - else - { - vpxor(ymm0, ymm0); - vpmaxsw(uv, ymm0); - } - - vpminsw(uv, ymm6); - } - else - { - vpand(uv, ymm5); - - if(region) - { - vpor(uv, ymm6); - } - } - } - else - { - vbroadcasti128(ymm0, ptr[&m_local.gd->t.mask]); - - // GSVector8i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(ymm1, uv, ymm5); - - if(region) - { - vpor(ymm1, ymm6); - } - - // GSVector8i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv, ymm5); - vpminsw(uv, ymm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv, ymm1, ymm0); - } -} - -void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) -{ - // ymm5 = minuv - // ymm6 = maxuv - // ymm0, ymm1, ymm4 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - vpmaxsw(uv0, ymm5); - vpmaxsw(uv1, ymm5); - } - else - { - vpxor(ymm0, ymm0); - vpmaxsw(uv0, ymm0); - vpmaxsw(uv1, ymm0); - } - - vpminsw(uv0, ymm6); - vpminsw(uv1, ymm6); - } - else - { - vpand(uv0, ymm5); - vpand(uv1, ymm5); - - if(region) - { - vpor(uv0, ymm6); - vpor(uv1, ymm6); - } - } - } - else - { - vbroadcasti128(ymm0, ptr[&m_local.gd->t.mask]); - - // uv0 - - // GSVector8i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(ymm1, uv0, ymm5); - - if(region) - { - vpor(ymm1, ymm6); - } - - // GSVector8i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv0, ymm5); - vpminsw(uv0, ymm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv0, ymm1, ymm0); - - // uv1 - - // GSVector8i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - vpand(ymm1, uv1, ymm5); - - if(region) - { - vpor(ymm1, ymm6); - } - - // GSVector8i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - vpmaxsw(uv1, ymm5); - vpminsw(uv1, ymm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - vpblendvb(uv1, ymm1, ymm0); - } -} - -void GSDrawScanlineCodeGenerator::AlphaTFX() -{ - if(!m_sel.fb) - { - return; - } - - switch(m_sel.tfx) - { - case TFX_MODULATE: - - // GSVector8i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - // gat = gat.modulate16<1>(ga).clamp8(); - - modulate16(ymm6, ymm4, 1); - - clamp16(ymm6, ymm3); - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(ymm4, 7); - - mix16(ymm6, ymm4, ymm3); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - vpsrlw(ymm4, 7); - - mix16(ymm6, ymm4, ymm3); - } - - break; - - case TFX_HIGHLIGHT: - - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vmovdqa(ymm2, ymm4); - - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); - - vpsrlw(ymm4, 7); - - if(m_sel.tcc) - { - vpaddusb(ymm4, ymm6); - } - - mix16(ymm6, ymm4, ymm3); - - break; - - case TFX_HIGHLIGHT2: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vmovdqa(ymm2, ymm4); - - vpsrlw(ymm4, 7); - - mix16(ymm6, ymm4, ymm3); - } - - break; - - case TFX_NONE: - - // gat = iip ? ga.srl16(7) : ga; - - if(m_sel.iip) - { - vpsrlw(ymm6, 7); - } - - break; - } - - if(m_sel.aa1) - { - // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha - - // FIXME: bios config screen cubes - - if(!m_sel.abe) - { - // a = cov - - if(m_sel.edge) - { - vmovdqa(ymm0, ptr[&m_local.temp.cov]); - } - else - { - vpcmpeqd(ymm0, ymm0); - vpsllw(ymm0, 15); - vpsrlw(ymm0, 8); - } - - mix16(ymm6, ymm0, ymm1); - } - else - { - // a = a == 0x80 ? cov : a - - vpcmpeqd(ymm0, ymm0); - vpsllw(ymm0, 15); - vpsrlw(ymm0, 8); - - if(m_sel.edge) - { - vmovdqa(ymm1, ptr[&m_local.temp.cov]); - } - else - { - vmovdqa(ymm1, ymm0); - } - - vpcmpeqw(ymm0, ymm6); - vpsrld(ymm0, 16); - vpslld(ymm0, 16); - - vpblendvb(ymm6, ymm1, ymm0); - } - } -} - -void GSDrawScanlineCodeGenerator::ReadMask() -{ - if(m_sel.fwrite) - { - vpbroadcastd(ymm3, ptr[&m_local.gd->fm]); - } - - if(m_sel.zwrite) - { - vpbroadcastd(ymm4, ptr[&m_local.gd->zm]); - } -} - -void GSDrawScanlineCodeGenerator::TestAlpha() -{ - switch(m_sel.afail) - { - case AFAIL_FB_ONLY: - if(!m_sel.zwrite) return; - break; - - case AFAIL_ZB_ONLY: - if(!m_sel.fwrite) return; - break; - - case AFAIL_RGB_ONLY: - if(!m_sel.zwrite && m_sel.fpsm == 1) return; - break; - } - - switch(m_sel.atst) - { - case ATST_NEVER: - // t = GSVector8i::xffffffff(); - vpcmpeqd(ymm1, ymm1); - break; - - case ATST_ALWAYS: - return; - - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpgtd(ymm1, ymm0); - break; - - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpeqd(ymm1, ymm0); - vpcmpeqd(ymm0, ymm0); - vpxor(ymm1, ymm0); - break; - - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - vpsrld(ymm0, ymm6, 16); - vbroadcasti128(ymm1, ptr[&m_local.gd->aref]); - vpcmpgtd(ymm1, ymm0); - break; - - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpeqd(ymm1, ymm0); - break; - } - - switch(m_sel.afail) - { - case AFAIL_KEEP: - // test |= t; - vpor(ymm7, ymm1); - alltrue(); - break; - - case AFAIL_FB_ONLY: - // zm |= t; - vpor(ymm4, ymm1); - break; - - case AFAIL_ZB_ONLY: - // fm |= t; - vpor(ymm3, ymm1); - break; - - case AFAIL_RGB_ONLY: - // zm |= t; - vpor(ymm4, ymm1); - // fm |= t & GSVector8i::xff000000(); - vpsrld(ymm1, 24); - vpslld(ymm1, 24); - vpor(ymm3, ymm1); - break; - } -} - -void GSDrawScanlineCodeGenerator::ColorTFX() -{ - if(!m_sel.fwrite) - { - return; - } - - switch(m_sel.tfx) - { - case TFX_MODULATE: - - // GSVector8i rb = iip ? rbf : m_local.c.rb; - - // rbt = rbt.modulate16<1>(rb).clamp8(); - - modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - - clamp16(ymm5, ymm1); - - break; - - case TFX_DECAL: - - break; - - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - - if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) - { - // GSVector8i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(ymm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - } - - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - - vmovdqa(ymm1, ymm6); - - modulate16(ymm6, ymm2, 1); - - vpshuflw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpsrlw(ymm2, 7); - - vpaddw(ymm6, ymm2); - - clamp16(ymm6, ymm0); - - mix16(ymm6, ymm1, ymm0); - - // GSVector8i rb = iip ? rbf : m_local.c.rb; - - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - - modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - - vpaddw(ymm5, ymm2); - - clamp16(ymm5, ymm0); - - break; - - case TFX_NONE: - - // rbt = iip ? rb.srl16(7) : rb; - - if(m_sel.iip) - { - vpsrlw(ymm5, 7); - } - - break; - } -} - -void GSDrawScanlineCodeGenerator::Fog() -{ - if(!m_sel.fwrite || !m_sel.fge) - { - return; - } - - // rb = m_local.gd->frb.lerp16<0>(rb, f); - // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - vmovdqa(ymm0, ptr[&m_local.temp.f]); - } - else - { - vpbroadcastw(ymm0, ptr[&m_local.p.f]); - } - - vmovdqa(ymm1, ymm6); - - vpbroadcastd(ymm2, ptr[&m_local.gd->frb]); - lerp16(ymm5, ymm2, ymm0, 0); - - vpbroadcastd(ymm2, ptr[&m_local.gd->fga]); - lerp16(ymm6, ymm2, ymm0, 0); - mix16(ymm6, ymm1, ymm0); -} - -void GSDrawScanlineCodeGenerator::ReadFrame() -{ - if(!m_sel.fb) - { - return; - } - - // int fa = fza_base.x + fza_offset->x; - - mov(ebx, ptr[esi]); - add(ebx, ptr[edi]); - - if(!m_sel.rfb) - { - return; - } - - ReadPixel(ymm2, ymm0, ebx); -} - -void GSDrawScanlineCodeGenerator::TestDestAlpha() -{ - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) - { - return; - } - - // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); - - if(m_sel.datm) - { - if(m_sel.fpsm == 2) - { - vpxor(ymm0, ymm0); - //vpsrld(ymm1, ymm2, 15); - vpslld(ymm1, ymm2, 16); - vpsrad(ymm1, 31); - vpcmpeqd(ymm1, ymm0); - } - else - { - vpcmpeqd(ymm0, ymm0); - vpxor(ymm1, ymm2, ymm0); - vpsrad(ymm1, 31); - } - } - else - { - if(m_sel.fpsm == 2) - { - vpslld(ymm1, ymm2, 16); - vpsrad(ymm1, 31); - } - else - { - vpsrad(ymm1, ymm2, 31); - } - } - - vpor(ymm7, ymm1); - - alltrue(); -} - -void GSDrawScanlineCodeGenerator::WriteMask() -{ - if(m_sel.notest) - { - return; - } - - // fm |= test; - // zm |= test; - - if(m_sel.fwrite) - { - vpor(ymm3, ymm7); - } - - if(m_sel.zwrite) - { - vpor(ymm4, ymm7); - } - - // int fzm = ~(fm == GSVector8i::xffffffff()).ps32(zm == GSVector8i::xffffffff()).mask(); - - vpcmpeqd(ymm1, ymm1); - - if(m_sel.fwrite && m_sel.zwrite) - { - vpcmpeqd(ymm0, ymm1, ymm4); - vpcmpeqd(ymm1, ymm3); - vpackssdw(ymm1, ymm0); - } - else if(m_sel.fwrite) - { - vpcmpeqd(ymm1, ymm3); - vpackssdw(ymm1, ymm1); - } - else if(m_sel.zwrite) - { - vpcmpeqd(ymm1, ymm4); - vpackssdw(ymm1, ymm1); - } - - vpmovmskb(edx, ymm1); - - not(edx); -} - -void GSDrawScanlineCodeGenerator::WriteZBuf() -{ - if(!m_sel.zwrite) - { - return; - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - vmovdqa(ymm1, ptr[&m_local.temp.zs]); - } - else - { - vpbroadcastd(ymm1, ptr[&m_local.p.z]); - } - - if(m_sel.ztest && m_sel.zpsm < 2) - { - // zs = zs.blend8(zd, zm); - - vpblendvb(ymm1, ptr[&m_local.temp.zd], ymm4); - } - - bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest; - - WritePixel(ymm1, ymm0, ebp, edx, fast, m_sel.zpsm, 1); -} - -void GSDrawScanlineCodeGenerator::AlphaBlend() -{ - if(!m_sel.fwrite) - { - return; - } - - if(m_sel.abe == 0 && m_sel.aa1 == 0) - { - return; - } - - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) - { - switch(m_sel.fpsm) - { - case 0: - case 1: - - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; - - vpsllw(ymm0, ymm2, 8); - vpsrlw(ymm0, 8); - vpsrlw(ymm1, ymm2, 8); - - break; - - case 2: - - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - - vpcmpeqd(ymm7, ymm7); - - vpsrld(ymm7, 27); // 0x0000001f - vpand(ymm0, ymm2, ymm7); - vpslld(ymm0, 3); - - vpslld(ymm7, 10); // 0x00007c00 - vpand(ymm4, ymm2, ymm7); - vpslld(ymm4, 9); - - vpor(ymm0, ymm4); - - vpsrld(ymm7, 5); // 0x000003e0 - vpand(ymm1, ymm2, ymm7); - vpsrld(ymm1, 2); - - vpsllw(ymm7, 10); // 0x00008000 - vpand(ymm4, ymm2, ymm7); - vpslld(ymm4, 8); - - vpor(ymm1, ymm4); - - break; - } - } - - // ymm5, ymm6 = src rb, ga - // ymm0, ymm1 = dst rb, ga - // ymm2, ymm3 = used - // ymm4, ymm7 = free - - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) - { - vmovdqa(ymm4, ymm5); - } - - if(m_sel.aba != m_sel.abb) - { - // rb = c[aba * 2 + 0]; - - switch(m_sel.aba) - { - case 0: break; - case 1: vmovdqa(ymm5, ymm0); break; - case 2: vpxor(ymm5, ymm5); break; - } - - // rb = rb.sub16(c[abb * 2 + 0]); - - switch(m_sel.abb) - { - case 0: vpsubw(ymm5, ymm4); break; - case 1: vpsubw(ymm5, ymm0); break; - case 2: break; - } - - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) - { - // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - - switch(m_sel.abc) - { - case 0: - case 1: - vpshuflw(ymm7, m_sel.abc ? ymm1 : ymm6, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(ymm7, ymm7, _MM_SHUFFLE(3, 3, 1, 1)); - vpsllw(ymm7, 7); - break; - case 2: - vpbroadcastw(ymm7, ptr[&m_local.gd->afix]); - break; - } - - // rb = rb.modulate16<1>(a); - - modulate16(ymm5, ymm7, 1); - } - - // rb = rb.add16(c[abd * 2 + 0]); - - switch(m_sel.abd) - { - case 0: vpaddw(ymm5, ymm4); break; - case 1: vpaddw(ymm5, ymm0); break; - case 2: break; - } - } - else - { - // rb = c[abd * 2 + 0]; - - switch(m_sel.abd) - { - case 0: break; - case 1: vmovdqa(ymm5, ymm0); break; - case 2: vpxor(ymm5, ymm5); break; - } - } - - if(m_sel.pabe) - { - // mask = (c[1] << 8).sra32(31); - - vpslld(ymm0, ymm6, 8); - vpsrad(ymm0, 31); - - // rb = c[0].blend8(rb, mask); - - vpblendvb(ymm5, ymm4, ymm5, ymm0); - } - - // ymm6 = src ga - // ymm1 = dst ga - // ymm5 = rb - // ymm7 = a - // ymm2, ymm3 = used - // ymm0, ymm4 = free - - vmovdqa(ymm4, ymm6); - - if(m_sel.aba != m_sel.abb) - { - // ga = c[aba * 2 + 1]; - - switch(m_sel.aba) - { - case 0: break; - case 1: vmovdqa(ymm6, ymm1); break; - case 2: vpxor(ymm6, ymm6); break; - } - - // ga = ga.sub16(c[abeb * 2 + 1]); - - switch(m_sel.abb) - { - case 0: vpsubw(ymm6, ymm4); break; - case 1: vpsubw(ymm6, ymm1); break; - case 2: break; - } - - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) - { - // ga = ga.modulate16<1>(a); - - modulate16(ymm6, ymm7, 1); - } - - // ga = ga.add16(c[abd * 2 + 1]); - - switch(m_sel.abd) - { - case 0: vpaddw(ymm6, ymm4); break; - case 1: vpaddw(ymm6, ymm1); break; - case 2: break; - } - } - else - { - // ga = c[abd * 2 + 1]; - - switch(m_sel.abd) - { - case 0: break; - case 1: vmovdqa(ymm6, ymm1); break; - case 2: vpxor(ymm6, ymm6); break; - } - } - - // ymm4 = src ga - // ymm5 = rb - // ymm6 = ga - // ymm2, ymm3 = used - // ymm0, ymm1, ymm7 = free - - if(m_sel.pabe) - { - vpsrld(ymm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) - - // ga = c[1].blend8(ga, mask).mix16(c[1]); - - vpblendvb(ymm6, ymm4, ymm6, ymm0); - } - else - { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx - { - mix16(ymm6, ymm4, ymm7); - } - } -} - -void GSDrawScanlineCodeGenerator::WriteFrame() -{ - if(!m_sel.fwrite) - { - return; - } - - if(m_sel.fpsm == 2 && m_sel.dthe) - { - mov(eax, ptr[esp + _top]); - and(eax, 3); - shl(eax, 5); - mov(ebp, ptr[&m_local.gd->dimx]); - vbroadcasti128(ymm7, ptr[ebp + eax + sizeof(GSVector4i) * 0]); - vpaddw(ymm5, ymm7); - vbroadcasti128(ymm7, ptr[ebp + eax + sizeof(GSVector4i) * 1]); - vpaddw(ymm6, ymm7); - } - - if(m_sel.colclamp == 0) - { - // c[0] &= 0x00ff00ff; - // c[1] &= 0x00ff00ff; - - vpcmpeqd(ymm7, ymm7); - vpsrlw(ymm7, 8); - vpand(ymm5, ymm7); - vpand(ymm6, ymm7); - } - - // GSVector8i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1])); - - vpunpckhwd(ymm7, ymm5, ymm6); - vpunpcklwd(ymm5, ymm6); - vpackuswb(ymm5, ymm7); - - if(m_sel.fba && m_sel.fpsm != 1) - { - // fs |= 0x80000000; - - vpcmpeqd(ymm7, ymm7); - vpslld(ymm7, 31); - vpor(ymm5, ymm7); - } - - if(m_sel.fpsm == 2) - { - // GSVector8i rb = fs & 0x00f800f8; - // GSVector8i ga = fs & 0x8000f800; - - mov(eax, 0x00f800f8); - vmovd(xmm6, eax); - vpbroadcastd(ymm6, xmm6); - - mov(eax, 0x8000f800); - vmovd(xmm7, eax); - vpbroadcastd(ymm7, xmm7); - - vpand(ymm4, ymm5, ymm6); - vpand(ymm5, ymm7); - - // fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); - - vpsrld(ymm6, ymm4, 9); - vpsrld(ymm4, 3); - vpsrld(ymm7, ymm5, 16); - vpsrld(ymm5, 6); - - vpor(ymm5, ymm4); - vpor(ymm7, ymm6); - vpor(ymm5, ymm7); - } - - if(m_sel.rfb) - { - // fs = fs.blend(fd, fm); - - blend(ymm5, ymm2, ymm3); // TODO: could be skipped in certain cases, depending on fpsm and fm - } - - bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest; - - WritePixel(ymm5, ymm0, ebx, edx, fast, m_sel.fpsm, 0); -} - -void GSDrawScanlineCodeGenerator::ReadPixel(const Ymm& dst, const Ymm& temp, const Reg32& addr) -{ - vmovq(Xmm(dst.getIdx()), qword[addr * 2 + (size_t)m_local.gd->vm]); - vmovhps(Xmm(dst.getIdx()), qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2]); - vmovq(Xmm(temp.getIdx()), qword[addr * 2 + (size_t)m_local.gd->vm + 16 * 2]); - vmovhps(Xmm(temp.getIdx()), qword[addr * 2 + (size_t)m_local.gd->vm + 24 * 2]); - vinserti128(dst, dst, temp, 1); -/* - vmovdqu(dst, ptr[addr * 2 + (size_t)m_local.gd->vm]); - vmovdqu(temp, ptr[addr * 2 + (size_t)m_local.gd->vm + 16 * 2]); - vpunpcklqdq(dst, dst, temp); - vpermq(dst, dst, _MM_SHUFFLE(3, 1, 2, 0)); -*/ -} - -void GSDrawScanlineCodeGenerator::WritePixel(const Ymm& src, const Ymm& temp, const Reg32& addr, const Reg32& mask, bool fast, int psm, int fz) -{ - Xmm src1 = Xmm(src.getIdx()); - Xmm src2 = Xmm(temp.getIdx()); - - vextracti128(src2, src, 1); - - if(m_sel.notest) - { - if(fast) - { - vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src1); - vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src1); - vmovq(qword[addr * 2 + (size_t)m_local.gd->vm + 16 * 2], src2); - vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 24 * 2], src2); - } - else - { - WritePixel(src1, addr, 0, 0, psm); - WritePixel(src1, addr, 1, 1, psm); - WritePixel(src1, addr, 2, 2, psm); - WritePixel(src1, addr, 3, 3, psm); - WritePixel(src2, addr, 4, 0, psm); - WritePixel(src2, addr, 5, 1, psm); - WritePixel(src2, addr, 6, 2, psm); - WritePixel(src2, addr, 7, 3, psm); - } - } - else - { - // cascade tests? - - if(fast) - { - test(mask, 0x0000000f << (fz * 8)); - je("@f"); - vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src1); - L("@@"); - - test(mask, 0x000000f0 << (fz * 8)); - je("@f"); - vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src1); - L("@@"); - - test(mask, 0x000f0000 << (fz * 8)); - je("@f"); - vmovq(qword[addr * 2 + (size_t)m_local.gd->vm + 16 * 2], src2); - L("@@"); - - test(mask, 0x00f00000 << (fz * 8)); - je("@f"); - vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 24 * 2], src2); - L("@@"); - - // vmaskmovps? - } - else - { - test(mask, 0x00000003 << (fz * 8)); - je("@f"); - WritePixel(src1, addr, 0, 0, psm); - L("@@"); - - test(mask, 0x0000000c << (fz * 8)); - je("@f"); - WritePixel(src1, addr, 1, 1, psm); - L("@@"); - - test(mask, 0x00000030 << (fz * 8)); - je("@f"); - WritePixel(src1, addr, 2, 2, psm); - L("@@"); - - test(mask, 0x000000c0 << (fz * 8)); - je("@f"); - WritePixel(src1, addr, 3, 3, psm); - L("@@"); - - test(mask, 0x00030000 << (fz * 8)); - je("@f"); - WritePixel(src2, addr, 4, 0, psm); - L("@@"); - - test(mask, 0x000c0000 << (fz * 8)); - je("@f"); - WritePixel(src2, addr, 5, 1, psm); - L("@@"); - - test(mask, 0x00300000 << (fz * 8)); - je("@f"); - WritePixel(src2, addr, 6, 2, psm); - L("@@"); - - test(mask, 0x00c00000 << (fz * 8)); - je("@f"); - WritePixel(src2, addr, 7, 3, psm); - L("@@"); - } - } -} - -static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, uint8 j, int psm) -{ - Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2]; - - switch(psm) - { - case 0: - if(j == 0) vmovd(dst, src); - else vpextrd(dst, src, j); - break; - case 1: - if(j == 0) vmovd(eax, src); - else vpextrd(eax, src, j); - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - if(j == 0) vmovd(eax, src); - else vpextrw(eax, src, j * 2); - mov(dst, ax); - break; - } -} - -void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) -{ - // in - // ymm5 = addr00 - // ymm2 = addr01 - // ymm0 = addr10 - // ymm3 = addr11 - // ebx = m_local.tex[0] (!m_sel.mmin) - // ebp = m_local.tex (m_sel.mmin) - // edx = m_local.clut (m_sel.tlu) - - // out - // ymm6 = c00 - // ymm4 = c01 - // ymm1 = c10 - // ymm5 = c11 - - ASSERT(pixels == 1 || pixels == 4); - - mip_offset *= sizeof(void*); - - const GSVector8i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - - if(m_sel.mmin && !m_sel.lcm) - { - const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; - const int t[] = {1, 4, 5, 1, 2, 5, 0, 2}; - - for(int i = 0; i < pixels; i++) - { - Ymm src = Ymm(r[i * 2 + 0]); - Ymm dst = Ymm(r[i * 2 + 1]); - Ymm t1 = Ymm(t[i * 2 + 0]); - Ymm t2 = Ymm(t[i * 2 + 1]); - - vextracti128(Xmm(t1.getIdx()), src, 1); - - for(uint8 j = 0; j < 4; j++) - { - mov(ebx, ptr[&lod_i->u32[j + 0]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(dst, src, j); - - mov(ebx, ptr[&lod_i->u32[j + 4]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(t2, t1, j); - } - - vinserti128(dst, dst, t2, 1); - } - } - else - { - const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; - const int t[] = {1, 4, 5, 1, 2, 5, 0, 2}; - - if(m_sel.mmin && m_sel.lcm) - { - mov(ebx, ptr[&lod_i->u32[0]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - } - - for(int i = 0; i < pixels; i++) - { - Ymm src = Ymm(r[i * 2 + 0]); - Ymm dst = Ymm(r[i * 2 + 1]); - Ymm t1 = Ymm(t[i * 2 + 0]); - Ymm t2 = Ymm(t[i * 2 + 1]); - - if(!m_sel.tlu) - { - vpcmpeqd(t1, t1); - vpgatherdd(dst, ptr[ebx + src * 4], t1); - } - else - { - vextracti128(Xmm(t1.getIdx()), src, 1); - - for(uint8 j = 0; j < 4; j++) - { - ReadTexel(dst, src, j); - ReadTexel(t2, t1, j); - } - - vinserti128(dst, dst, t2, 1); - /* - vpcmpeqd(t1, t1); - vpgatherdd(t2, ptr[ebx + src * 1], t1); // either this 1x scale, or the latency of two dependendent gathers are too slow - vpslld(t2, 24); - vpsrld(t2, 24); - vpcmpeqd(t1, t1); - vpgatherdd(dst, ptr[edx + t2 * 4], t1); - */ - } - } - } -} - -void GSDrawScanlineCodeGenerator::ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i) -{ - ASSERT(i < 4); - - const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; - - if(i == 0) vmovd(eax, Xmm(addr.getIdx())); - else vpextrd(eax, Xmm(addr.getIdx()), i); - - if(m_sel.tlu) movzx(eax, byte[ebx + eax]); - - if(i == 0) vmovd(Xmm(dst.getIdx()), src); - else vpinsrd(Xmm(dst.getIdx()), src, i); -} - - -#endif \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.cpp deleted file mode 100644 index eb95e857ce..0000000000 --- a/plugins/GSdx_legacy/GSDrawScanlineCodeGenerator.x86.cpp +++ /dev/null @@ -1,3175 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDrawScanlineCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE < 0x500 && !(defined(_M_AMD64) || defined(_WIN64)) - -static const int _args = 16; -static const int _top = _args + 4; -static const int _v = _args + 8; - -void GSDrawScanlineCodeGenerator::Generate() -{ - push(ebx); - push(esi); - push(edi); - push(ebp); - - Init(); - - if(!m_sel.edge) - { - align(16); - } - -L("loop"); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // xmm0 = z/zi - // xmm2 = s/u (tme) - // xmm3 = t/v (tme) - // xmm4 = q (tme) - // xmm5 = rb (!tme) - // xmm6 = ga (!tme) - // xmm7 = test - - bool tme = m_sel.tfx != TFX_NONE; - - TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // - xmm0 - // xmm2 = s/u (tme) - // xmm3 = t/v (tme) - // xmm4 = q (tme) - // xmm5 = rb (!tme) - // xmm6 = ga (!tme) - // xmm7 = test - - if(m_sel.mmin) - { - SampleTextureLOD(); - } - else - { - SampleTexture(); - } - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // - xmm2 - // - xmm3 - // - xmm4 - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - AlphaTFX(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - ReadMask(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - TestAlpha(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - ColorTFX(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - Fog(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - ReadFrame(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = fd - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - TestDestAlpha(); - - // ecx = steps - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = fd - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - // xmm7 = test - - WriteMask(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // ebp = za - // xmm2 = fd - // xmm3 = fm - // xmm4 = zm - // xmm5 = rb - // xmm6 = ga - - WriteZBuf(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // - ebp - // xmm2 = fd - // xmm3 = fm - // - xmm4 - // xmm5 = rb - // xmm6 = ga - - AlphaBlend(); - - // ebx = fa - // ecx = steps - // edx = fzm - // esi = fzbr - // edi = fzbc - // xmm2 = fd - // xmm3 = fm - // xmm5 = rb - // xmm6 = ga - - WriteFrame(); - -L("step"); - - // if(steps <= 0) break; - - if(!m_sel.edge) - { - test(ecx, ecx); - - jle("exit", T_NEAR); - - Step(); - - jmp("loop", T_NEAR); - } - -L("exit"); - - // vzeroupper(); - - pop(ebp); - pop(edi); - pop(esi); - pop(ebx); - - ret(8); -} - -void GSDrawScanlineCodeGenerator::Init() -{ - if(!m_sel.notest) - { - // int skip = left & 3; - - mov(ebx, edx); - and(edx, 3); - - // int steps = pixels + skip - 4; - - lea(ecx, ptr[ecx + edx - 4]); - - // left -= skip; - - sub(ebx, edx); - - // GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))]; - - shl(edx, 4); - - movdqa(xmm7, ptr[edx + (size_t)&m_test[0]]); - - mov(eax, ecx); - sar(eax, 31); - and(eax, ecx); - shl(eax, 4); - - por(xmm7, ptr[eax + (size_t)&m_test[7]]); - } - else - { - mov(ebx, edx); // left - xor(edx, edx); // skip - lea(ecx, ptr[ecx - 4]); // steps - } - - // GSVector2i* fza_base = &m_local.gd->fzbr[top]; - - mov(esi, ptr[esp + _top]); - lea(esi, ptr[esi * 8]); - add(esi, ptr[&m_local.gd->fzbr]); - - // GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2]; - - lea(edi, ptr[ebx * 2]); - add(edi, ptr[&m_local.gd->fzbc]); - - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) - { - // edx = &m_local.d[skip] - - lea(edx, ptr[edx * 8 + (size_t)m_local.d]); - - // ebx = &v - - mov(ebx, ptr[esp + _v]); - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) - { - movaps(xmm0, ptr[ebx + offsetof(GSVertexSW, p)]); // v.p - - if(m_sel.fwrite && m_sel.fge) - { - // f = GSVector4i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); - - cvttps2dq(xmm1, xmm0); - pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - paddw(xmm1, ptr[edx + offsetof(GSScanlineLocalData::skip, f)]); - - movdqa(ptr[&m_local.temp.f], xmm1); - } - - if(m_sel.zb) - { - // z = vp.zzzz() + m_local.d[skip].z; - - shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - movaps(ptr[&m_local.temp.z], xmm0); - movaps(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]); - movaps(ptr[&m_local.temp.zo], xmm2); - addps(xmm0, xmm2); - } - } - } - else - { - if(m_sel.ztest) - { - movdqa(xmm0, ptr[&m_local.p.z]); - } - } - - if(m_sel.fb) - { - if(m_sel.edge || m_sel.tfx != TFX_NONE) - { - movaps(xmm4, ptr[ebx + offsetof(GSVertexSW, t)]); // v.t - } - - if(m_sel.edge) - { - // m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9); - - pshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - pshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3)); - psrlw(xmm3, 9); - - movdqa(ptr[&m_local.temp.cov], xmm3); - } - - if(m_sel.tfx != TFX_NONE) - { - if(m_sel.fst) - { - // GSVector4i vti(vt); - - cvttps2dq(xmm6, xmm4); - - // s = vti.xxxx() + m_local.d[skip].s; - // t = vti.yyyy(); if(!sprite) t += m_local.d[skip].t; - - pshufd(xmm2, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm3, xmm6, _MM_SHUFFLE(1, 1, 1, 1)); - - paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) - { - paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); - } - else - { - if(m_sel.ltf) - { - pshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm6, 12); - movdqa(ptr[&m_local.temp.vf], xmm6); - } - } - - movdqa(ptr[&m_local.temp.s], xmm2); - movdqa(ptr[&m_local.temp.t], xmm3); - } - else - { - // s = vt.xxxx() + m_local.d[skip].s; - // t = vt.yyyy() + m_local.d[skip].t; - // q = vt.zzzz() + m_local.d[skip].q; - - movaps(xmm2, xmm4); - movaps(xmm3, xmm4); - - shufps(xmm2, xmm2, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm3, xmm3, _MM_SHUFFLE(1, 1, 1, 1)); - shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - - addps(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - addps(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); - addps(xmm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]); - - movaps(ptr[&m_local.temp.s], xmm2); - movaps(ptr[&m_local.temp.t], xmm3); - movaps(ptr[&m_local.temp.q], xmm4); - } - } - - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) - { - if(m_sel.iip) - { - // GSVector4i vc = GSVector4i(v.c); - - cvttps2dq(xmm6, ptr[ebx + offsetof(GSVertexSW, c)]); // v.c - - // vc = vc.upl16(vc.zwxy()); - - pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 0, 3, 2)); - punpcklwd(xmm6, xmm5); - - // rb = vc.xxxx().add16(m_local.d[skip].rb); - // ga = vc.zzzz().add16(m_local.d[skip].ga); - - pshufd(xmm5, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2)); - - paddw(xmm5, ptr[edx + offsetof(GSScanlineLocalData::skip, rb)]); - paddw(xmm6, ptr[edx + offsetof(GSScanlineLocalData::skip, ga)]); - - movdqa(ptr[&m_local.temp.rb], xmm5); - movdqa(ptr[&m_local.temp.ga], xmm6); - } - else - { - if(m_sel.tfx == TFX_NONE) - { - movdqa(xmm5, ptr[&m_local.c.rb]); - movdqa(xmm6, ptr[&m_local.c.ga]); - } - } - } - } -} - -void GSDrawScanlineCodeGenerator::Step() -{ - // steps -= 4; - - sub(ecx, 4); - - // fza_offset++; - - add(edi, 8); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // z += m_local.d4.z; - - if(m_sel.zb) - { - movaps(xmm0, ptr[&m_local.temp.zo]); - addps(xmm0, ptr[&m_local.d4.z]); - movaps(ptr[&m_local.temp.zo], xmm0); - addps(xmm0, ptr[&m_local.temp.z]); - } - - // f = f.add16(m_local.d4.f); - - if(m_sel.fwrite && m_sel.fge) - { - movdqa(xmm1, ptr[&m_local.temp.f]); - paddw(xmm1, ptr[&m_local.d4.f]); - movdqa(ptr[&m_local.temp.f], xmm1); - } - } - else - { - if(m_sel.ztest) - { - movdqa(xmm0, ptr[&m_local.p.z]); - } - } - - if(m_sel.fb) - { - if(m_sel.tfx != TFX_NONE) - { - if(m_sel.fst) - { - // GSVector4i stq = m_local.d4.stq; - - // s += stq.xxxx(); - // if(!sprite) t += stq.yyyy(); - - movdqa(xmm4, ptr[&m_local.d4.stq]); - - pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - paddd(xmm2, ptr[&m_local.temp.s]); - movdqa(ptr[&m_local.temp.s], xmm2); - - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) - { - pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); - paddd(xmm3, ptr[&m_local.temp.t]); - movdqa(ptr[&m_local.temp.t], xmm3); - } - else - { - movdqa(xmm3, ptr[&m_local.temp.t]); - } - } - else - { - // GSVector4 stq = m_local.d4.stq; - - // s += stq.xxxx(); - // t += stq.yyyy(); - // q += stq.zzzz(); - - movaps(xmm4, ptr[&m_local.d4.stq]); - movaps(xmm2, xmm4); - movaps(xmm3, xmm4); - - shufps(xmm2, xmm2, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm3, xmm3, _MM_SHUFFLE(1, 1, 1, 1)); - shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - - addps(xmm2, ptr[&m_local.temp.s]); - addps(xmm3, ptr[&m_local.temp.t]); - addps(xmm4, ptr[&m_local.temp.q]); - - movaps(ptr[&m_local.temp.s], xmm2); - movaps(ptr[&m_local.temp.t], xmm3); - movaps(ptr[&m_local.temp.q], xmm4); - } - } - - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) - { - if(m_sel.iip) - { - // GSVector4i c = m_local.d4.c; - - // rb = rb.add16(c.xxxx()); - // ga = ga.add16(c.yyyy()); - - movdqa(xmm7, ptr[&m_local.d4.c]); - - pshufd(xmm5, xmm7, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm6, xmm7, _MM_SHUFFLE(1, 1, 1, 1)); - - paddw(xmm5, ptr[&m_local.temp.rb]); - paddw(xmm6, ptr[&m_local.temp.ga]); - - // FIXME: color may underflow and roll over at the end of the line, if decreasing - - pxor(xmm7, xmm7); - pmaxsw(xmm5, xmm7); - pmaxsw(xmm6, xmm7); - - movdqa(ptr[&m_local.temp.rb], xmm5); - movdqa(ptr[&m_local.temp.ga], xmm6); - } - else - { - if(m_sel.tfx == TFX_NONE) - { - movdqa(xmm5, ptr[&m_local.c.rb]); - movdqa(xmm6, ptr[&m_local.c.ga]); - } - } - } - } - - if(!m_sel.notest) - { - // test = m_test[7 + (steps & (steps >> 31))]; - - mov(edx, ecx); - sar(edx, 31); - and(edx, ecx); - shl(edx, 4); - - movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); - } -} - -void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) -{ - if(!m_sel.zb) - { - return; - } - - // int za = fza_base.y + fza_offset->y; - - mov(ebp, ptr[esi + 4]); - add(ebp, ptr[edi + 4]); - - // GSVector4i zs = zi; - - if(m_sel.prim != GS_SPRITE_CLASS) - { - if(m_sel.zoverflow) - { - // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); - - movaps(temp1, ptr[&GSVector4::m_half]); - mulps(temp1, xmm0); - cvttps2dq(temp1, temp1); - pslld(temp1, 1); - - cvttps2dq(xmm0, xmm0); - pcmpeqd(temp2, temp2); - psrld(temp2, 31); - pand(xmm0, temp2); - - por(xmm0, temp1); - } - else - { - // zs = GSVector4i(z); - - cvttps2dq(xmm0, xmm0); - } - - if(m_sel.zwrite) - { - movdqa(ptr[&m_local.temp.zs], xmm0); - } - } - - if(m_sel.ztest) - { - ReadPixel(xmm1, ebp); - - if(m_sel.zwrite && m_sel.zpsm < 2) - { - movdqa(ptr[&m_local.temp.zd], xmm1); - } - - // zd &= 0xffffffff >> m_sel.zpsm * 8; - - if(m_sel.zpsm) - { - pslld(xmm1, m_sel.zpsm * 8); - psrld(xmm1, m_sel.zpsm * 8); - } - - if(m_sel.zoverflow || m_sel.zpsm == 0) - { - // GSVector4i o = GSVector4i::x80000000(); - - pcmpeqd(temp1, temp1); - pslld(temp1, 31); - - // GSVector4i zso = zs - o; - // GSVector4i zdo = zd - o; - - psubd(xmm0, temp1); - psubd(xmm1, temp1); - } - - switch(m_sel.ztst) - { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - pcmpgtd(xmm1, xmm0); - por(xmm7, xmm1); - break; - - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - pcmpgtd(xmm0, xmm1); - pcmpeqd(temp1, temp1); - pxor(xmm0, temp1); - por(xmm7, xmm0); - break; - } - - alltrue(); - } -} - -void GSDrawScanlineCodeGenerator::SampleTexture() -{ - if(!m_sel.fb || m_sel.tfx == TFX_NONE) - { - return; - } - - mov(ebx, ptr[&m_local.gd->tex[0]]); - - if(m_sel.tlu) - { - mov(edx, ptr[&m_local.gd->clut]); - } - - // ebx = tex - // edx = clut - - if(!m_sel.fst) - { - rcpps(xmm4, xmm4); - - mulps(xmm2, xmm4); - mulps(xmm3, xmm4); - - cvttps2dq(xmm2, xmm2); - cvttps2dq(xmm3, xmm3); - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - movd(xmm4, eax); - pshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - - psubd(xmm2, xmm4); - psubd(xmm3, xmm4); - } - } - - // xmm2 = u - // xmm3 = v - - if(m_sel.ltf) - { - // GSVector4i uf = u.xxzzlh().srl16(1); - - pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 12); - movdqa(ptr[&m_local.temp.uf], xmm0); - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4i vf = v.xxzzlh().srl16(1); - - pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 12); - movdqa(ptr[&m_local.temp.vf], xmm0); - } - } - - // GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - - psrad(xmm2, 16); - psrad(xmm3, 16); - packssdw(xmm2, xmm3); - - if(m_sel.ltf) - { - // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); - - movdqa(xmm3, xmm2); - pcmpeqd(xmm1, xmm1); - psrlw(xmm1, 15); - paddw(xmm3, xmm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - Wrap(xmm2, xmm3); - } - else - { - // uv0 = Wrap(uv0); - - Wrap(xmm2); - } - - // xmm2 = uv0 - // xmm3 = uv1 (ltf) - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i y0 = uv0.uph16() << tw; - // GSVector4i x0 = uv0.upl16(); - - pxor(xmm0, xmm0); - - movdqa(xmm4, xmm2); - punpckhwd(xmm2, xmm0); - punpcklwd(xmm4, xmm0); - pslld(xmm2, m_sel.tw + 3); - - // xmm0 = 0 - // xmm2 = y0 - // xmm3 = uv1 (ltf) - // xmm4 = x0 - // xmm1, xmm5, xmm6 = free - // xmm7 = used - - if(m_sel.ltf) - { - // GSVector4i y1 = uv1.uph16() << tw; - // GSVector4i x1 = uv1.upl16(); - - movdqa(xmm6, xmm3); - punpckhwd(xmm3, xmm0); - punpcklwd(xmm6, xmm0); - pslld(xmm3, m_sel.tw + 3); - - // xmm2 = y0 - // xmm3 = y1 - // xmm4 = x0 - // xmm6 = x1 - // xmm0, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i addr00 = y0 + x0; - // GSVector4i addr01 = y0 + x1; - // GSVector4i addr10 = y1 + x0; - // GSVector4i addr11 = y1 + x1; - - movdqa(xmm5, xmm2); - paddd(xmm5, xmm4); - paddd(xmm2, xmm6); - - movdqa(xmm0, xmm3); - paddd(xmm0, xmm4); - paddd(xmm3, xmm6); - - // xmm5 = addr00 - // xmm2 = addr01 - // xmm0 = addr10 - // xmm3 = addr11 - // xmm1, xmm4, xmm6 = free - // xmm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 0); - - // xmm6 = c00 - // xmm4 = c01 - // xmm1 = c10 - // xmm5 = c11 - // xmm0, xmm2, xmm3 = free - // xmm7 = used - - movdqa(xmm0, ptr[&m_local.temp.uf]); - - // GSVector4i rb00 = c00 & mask; - // GSVector4i ga00 = (c00 >> 8) & mask; - - movdqa(xmm2, xmm6); - psllw(xmm2, 8); - psrlw(xmm2, 8); - psrlw(xmm6, 8); - - // GSVector4i rb01 = c01 & mask; - // GSVector4i ga01 = (c01 >> 8) & mask; - - movdqa(xmm3, xmm4); - psllw(xmm3, 8); - psrlw(xmm3, 8); - psrlw(xmm4, 8); - - // xmm0 = uf - // xmm2 = rb00 - // xmm3 = rb01 - // xmm6 = ga00 - // xmm4 = ga01 - // xmm1 = c10 - // xmm5 = c11 - // xmm7 = used - - // rb00 = rb00.lerp_4(rb01, uf); - // ga00 = ga00.lerp_4(ga01, uf); - - lerp16_4(xmm3, xmm2, xmm0); - lerp16_4(xmm4, xmm6, xmm0); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = c10 - // xmm5 = c11 - // xmm2, xmm6 = free - // xmm7 = used - - // GSVector4i rb10 = c10 & mask; - // GSVector4i ga10 = (c10 >> 8) & mask; - - movdqa(xmm2, xmm1); - psllw(xmm1, 8); - psrlw(xmm1, 8); - psrlw(xmm2, 8); - - // GSVector4i rb11 = c11 & mask; - // GSVector4i ga11 = (c11 >> 8) & mask; - - movdqa(xmm6, xmm5); - psllw(xmm5, 8); - psrlw(xmm5, 8); - psrlw(xmm6, 8); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = rb10 - // xmm5 = rb11 - // xmm2 = ga10 - // xmm6 = ga11 - // xmm7 = used - - // rb10 = rb10.lerp_4(rb11, uf); - // ga10 = ga10.lerp_4(ga11, uf); - - lerp16_4(xmm5, xmm1, xmm0); - lerp16_4(xmm6, xmm2, xmm0); - - // xmm3 = rb00 - // xmm4 = ga00 - // xmm5 = rb10 - // xmm6 = ga10 - // xmm0, xmm1, xmm2 = free - // xmm7 = used - - // rb00 = rb00.lerp_4(rb10, vf); - // ga00 = ga00.lerp_4(ga10, vf); - - movdqa(xmm0, ptr[&m_local.temp.vf]); - - lerp16_4(xmm5, xmm3, xmm0); - lerp16_4(xmm6, xmm4, xmm0); - } - else - { - // GSVector4i addr00 = y0 + x0; - - paddd(xmm2, xmm4); - movdqa(xmm5, xmm2); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 0); - - // GSVector4i mask = GSVector4i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - movdqa(xmm5, xmm6); - psllw(xmm5, 8); - psrlw(xmm5, 8); - psrlw(xmm6, 8); - } -} - -void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv) -{ - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - pmaxsw(uv, ptr[&m_local.gd->t.min]); - } - else - { - pxor(xmm0, xmm0); - pmaxsw(uv, xmm0); - } - - pminsw(uv, ptr[&m_local.gd->t.max]); - } - else - { - pand(uv, ptr[&m_local.gd->t.min]); - - if(region) - { - por(uv, ptr[&m_local.gd->t.max]); - } - } - } - else - { - movdqa(xmm4, ptr[&m_local.gd->t.min]); - movdqa(xmm5, ptr[&m_local.gd->t.max]); - movdqa(xmm0, ptr[&m_local.gd->t.mask]); - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - movdqa(xmm1, uv); - - pand(xmm1, xmm4); - - if(region) - { - por(xmm1, xmm5); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - pmaxsw(uv, xmm4); - pminsw(uv, xmm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - blend8(uv, xmm1); - } -} - -void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1) -{ - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - movdqa(xmm4, ptr[&m_local.gd->t.min]); - pmaxsw(uv0, xmm4); - pmaxsw(uv1, xmm4); - } - else - { - pxor(xmm0, xmm0); - pmaxsw(uv0, xmm0); - pmaxsw(uv1, xmm0); - } - - movdqa(xmm5, ptr[&m_local.gd->t.max]); - pminsw(uv0, xmm5); - pminsw(uv1, xmm5); - } - else - { - movdqa(xmm4, ptr[&m_local.gd->t.min]); - pand(uv0, xmm4); - pand(uv1, xmm4); - - if(region) - { - movdqa(xmm5, ptr[&m_local.gd->t.max]); - por(uv0, xmm5); - por(uv1, xmm5); - } - } - } - else - { - movdqa(xmm4, ptr[&m_local.gd->t.min]); - movdqa(xmm5, ptr[&m_local.gd->t.max]); - - #if _M_SSE >= 0x401 - - movdqa(xmm0, ptr[&m_local.gd->t.mask]); - - #else - - movdqa(xmm0, ptr[&m_local.gd->t.invmask]); - movdqa(xmm6, xmm0); - - #endif - - // uv0 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - movdqa(xmm1, uv0); - - pand(xmm1, xmm4); - - if(region) - { - por(xmm1, xmm5); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - pmaxsw(uv0, xmm4); - pminsw(uv0, xmm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - #if _M_SSE >= 0x401 - - pblendvb(uv0, xmm1); - - #else - - blendr(uv0, xmm1, xmm0); - - #endif - - // uv1 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - movdqa(xmm1, uv1); - - pand(xmm1, xmm4); - - if(region) - { - por(xmm1, xmm5); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - pmaxsw(uv1, xmm4); - pminsw(uv1, xmm5); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - #if _M_SSE >= 0x401 - - pblendvb(uv1, xmm1); - - #else - - blendr(uv1, xmm1, xmm6); - - #endif - } -} - -void GSDrawScanlineCodeGenerator::SampleTextureLOD() -{ - if(!m_sel.fb || m_sel.tfx == TFX_NONE) - { - return; - } - - push(ebp); - - mov(ebp, (size_t)m_local.gd->tex); - - if(m_sel.tlu) - { - mov(edx, ptr[&m_local.gd->clut]); - } - - if(!m_sel.fst) - { - rcpps(xmm0, xmm4); - - mulps(xmm2, xmm0); - mulps(xmm3, xmm0); - - cvttps2dq(xmm2, xmm2); - cvttps2dq(xmm3, xmm3); - } - - // xmm2 = u - // xmm3 = v - // xmm4 = q - // xmm0 = xmm1 = xmm5 = xmm6 = free - - // TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (just take the exp) (but can we round it?) - - if(!m_sel.lcm) - { - // store u/v - - movdqa(xmm0, xmm2); - punpckldq(xmm2, xmm3); - movdqa(ptr[&m_local.temp.uv[0]], xmm2); - punpckhdq(xmm0, xmm3); - movdqa(ptr[&m_local.temp.uv[1]], xmm0); - - // lod = -log2(Q) * (1 << L) + K - - movdqa(xmm0, xmm4); - pcmpeqd(xmm1, xmm1); - psrld(xmm1, 25); - pslld(xmm0, 1); - psrld(xmm0, 24); - psubd(xmm0, xmm1); - cvtdq2ps(xmm0, xmm0); - - // xmm0 = (float)(exp(q) - 127) - - pslld(xmm4, 9); - psrld(xmm4, 9); - orps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - - // xmm4 = mant(q) | 1.0f - - movdqa(xmm5, xmm4); - mulps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); - addps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); - mulps(xmm5, xmm4); - subps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - addps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); - mulps(xmm4, xmm5); - addps(xmm4, xmm0); - - // xmm4 = log2(Q) = ((((c0 * xmm4) + c1) * xmm4) + c2) * (xmm4 - 1.0f) + xmm0 - - mulps(xmm4, ptr[&m_local.gd->l]); - addps(xmm4, ptr[&m_local.gd->k]); - - // xmm4 = (-log2(Q) * (1 << L) + K) * 0x10000 - - xorps(xmm0, xmm0); - minps(xmm4, ptr[&m_local.gd->mxl]); - maxps(xmm4, xmm0); - cvtps2dq(xmm4, xmm4); - - if(m_sel.mmin == 1) // round-off mode - { - mov(eax, 0x8000); - movd(xmm0, eax); - pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - paddd(xmm4, xmm0); - } - - movdqa(xmm0, xmm4); - psrld(xmm4, 16); - movdqa(ptr[&m_local.temp.lod.i], xmm4); - - if(m_sel.mmin == 2) // trilinear mode - { - pshuflw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - movdqa(ptr[&m_local.temp.lod.f], xmm0); - } - - // shift u/v by (int)lod - - movq(xmm4, ptr[&m_local.gd->t.minmax]); - - movdqa(xmm2, ptr[&m_local.temp.uv[0]]); - movdqa(xmm5, xmm2); - movdqa(xmm3, ptr[&m_local.temp.uv[1]]); - movdqa(xmm6, xmm3); - - movd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]); - psrad(xmm2, xmm0); - movdqa(xmm1, xmm4); - psrlw(xmm1, xmm0); - movq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1); - - movd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]); - psrad(xmm5, xmm0); - movdqa(xmm1, xmm4); - psrlw(xmm1, xmm0); - movq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1); - - movd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]); - psrad(xmm3, xmm0); - movdqa(xmm1, xmm4); - psrlw(xmm1, xmm0); - movq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1); - - movd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]); - psrad(xmm6, xmm0); - movdqa(xmm1, xmm4); - psrlw(xmm1, xmm0); - movq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1); - - punpckldq(xmm2, xmm3); - punpckhdq(xmm5, xmm6); - movdqa(xmm3, xmm2); - punpckldq(xmm2, xmm5); - punpckhdq(xmm3, xmm5); - - movdqa(ptr[&m_local.temp.uv[0]], xmm2); - movdqa(ptr[&m_local.temp.uv[1]], xmm3); - - movdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]); - movdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]); - - movdqa(xmm0, xmm5); - punpcklwd(xmm5, xmm6); - punpckhwd(xmm0, xmm6); - movdqa(xmm6, xmm5); - punpckldq(xmm5, xmm0); - punpckhdq(xmm6, xmm0); - - movdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5); - movdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6); - } - else - { - // lod = K - - movd(xmm0, ptr[&m_local.gd->lod.i.u32[0]]); - - psrad(xmm2, xmm0); - psrad(xmm3, xmm0); - - movdqa(ptr[&m_local.temp.uv[0]], xmm2); - movdqa(ptr[&m_local.temp.uv[1]], xmm3); - - movdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]); - movdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]); - } - - // xmm2 = m_local.temp.uv[0] = u (level m) - // xmm3 = m_local.temp.uv[1] = v (level m) - // xmm5 = minuv - // xmm6 = maxuv - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - movd(xmm4, eax); - pshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - - psubd(xmm2, xmm4); - psubd(xmm3, xmm4); - - // GSVector4i uf = u.xxzzlh().srl16(1); - - pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 12); - movdqa(ptr[&m_local.temp.uf], xmm0); - - // GSVector4i vf = v.xxzzlh().srl16(1); - - pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 12); - movdqa(ptr[&m_local.temp.vf], xmm0); - } - - // GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - - psrad(xmm2, 16); - psrad(xmm3, 16); - packssdw(xmm2, xmm3); - - if(m_sel.ltf) - { - // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); - - movdqa(xmm3, xmm2); - pcmpeqd(xmm1, xmm1); - psrlw(xmm1, 15); - paddw(xmm3, xmm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - WrapLOD(xmm2, xmm3); - } - else - { - // uv0 = Wrap(uv0); - - WrapLOD(xmm2); - } - - // xmm2 = uv0 - // xmm3 = uv1 (ltf) - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i x0 = uv0.upl16(); - // GSVector4i y0 = uv0.uph16() << tw; - - pxor(xmm0, xmm0); - - movdqa(xmm4, xmm2); - punpckhwd(xmm2, xmm0); - punpcklwd(xmm4, xmm0); - pslld(xmm2, m_sel.tw + 3); - - // xmm0 = 0 - // xmm2 = y0 - // xmm3 = uv1 (ltf) - // xmm4 = x0 - // xmm1, xmm5, xmm6 = free - // xmm7 = used - - if(m_sel.ltf) - { - // GSVector4i x1 = uv1.upl16(); - // GSVector4i y1 = uv1.uph16() << tw; - - movdqa(xmm6, xmm3); - punpcklwd(xmm6, xmm0); - punpckhwd(xmm3, xmm0); - pslld(xmm3, m_sel.tw + 3); - - // xmm2 = y0 - // xmm3 = y1 - // xmm4 = x0 - // xmm6 = x1 - // xmm0, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i addr00 = y0 + x0; - // GSVector4i addr01 = y0 + x1; - // GSVector4i addr10 = y1 + x0; - // GSVector4i addr11 = y1 + x1; - - movdqa(xmm5, xmm2); - paddd(xmm5, xmm4); - paddd(xmm2, xmm6); - - movdqa(xmm0, xmm3); - paddd(xmm0, xmm4); - paddd(xmm3, xmm6); - - // xmm5 = addr00 - // xmm2 = addr01 - // xmm0 = addr10 - // xmm3 = addr11 - // xmm1, xmm4, xmm6 = free - // xmm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 0); - - // xmm6 = c00 - // xmm4 = c01 - // xmm1 = c10 - // xmm5 = c11 - // xmm0, xmm2, xmm3 = free - // xmm7 = used - - movdqa(xmm0, ptr[&m_local.temp.uf]); - - // GSVector4i rb00 = c00 & mask; - // GSVector4i ga00 = (c00 >> 8) & mask; - - movdqa(xmm2, xmm6); - psrlw(xmm6, 8); - psllw(xmm2, 8); - psrlw(xmm2, 8); - - // GSVector4i rb01 = c01 & mask; - // GSVector4i ga01 = (c01 >> 8) & mask; - - movdqa(xmm3, xmm4); - psrlw(xmm4, 8); - psllw(xmm3, 8); - psrlw(xmm3, 8); - - // xmm0 = uf - // xmm2 = rb00 - // xmm3 = rb01 - // xmm6 = ga00 - // xmm4 = ga01 - // xmm1 = c10 - // xmm5 = c11 - // xmm7 = used - - // rb00 = rb00.lerp_4(rb01, uf); - // ga00 = ga00.lerp_4(ga01, uf); - - lerp16_4(xmm3, xmm2, xmm0); - lerp16_4(xmm4, xmm6, xmm0); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = c10 - // xmm5 = c11 - // xmm2, xmm6 = free - // xmm7 = used - - // GSVector4i rb10 = c10 & mask; - // GSVector4i ga10 = (c10 >> 8) & mask; - - movdqa(xmm2, xmm1); - psllw(xmm1, 8); - psrlw(xmm1, 8); - psrlw(xmm2, 8); - - // GSVector4i rb11 = c11 & mask; - // GSVector4i ga11 = (c11 >> 8) & mask; - - movdqa(xmm6, xmm5); - psllw(xmm5, 8); - psrlw(xmm5, 8); - psrlw(xmm6, 8); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = rb10 - // xmm5 = rb11 - // xmm2 = ga10 - // xmm6 = ga11 - // xmm7 = used - - // rb10 = rb10.lerp_4(rb11, uf); - // ga10 = ga10.lerp_4(ga11, uf); - - lerp16_4(xmm5, xmm1, xmm0); - lerp16_4(xmm6, xmm2, xmm0); - - // xmm3 = rb00 - // xmm4 = ga00 - // xmm5 = rb10 - // xmm6 = ga10 - // xmm0, xmm1, xmm2 = free - // xmm7 = used - - // rb00 = rb00.lerp_4(rb10, vf); - // ga00 = ga00.lerp_4(ga10, vf); - - movdqa(xmm0, ptr[&m_local.temp.vf]); - - lerp16_4(xmm5, xmm3, xmm0); - lerp16_4(xmm6, xmm4, xmm0); - } - else - { - // GSVector4i addr00 = y0 + x0; - - paddd(xmm2, xmm4); - movdqa(xmm5, xmm2); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 0); - - // GSVector4i mask = GSVector4i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - movdqa(xmm5, xmm6); - psllw(xmm5, 8); - psrlw(xmm5, 8); - psrlw(xmm6, 8); - } - - if(m_sel.mmin != 1) // !round-off mode - { - movdqa(ptr[&m_local.temp.trb], xmm5); - movdqa(ptr[&m_local.temp.tga], xmm6); - - movdqa(xmm2, ptr[&m_local.temp.uv[0]]); - movdqa(xmm3, ptr[&m_local.temp.uv[1]]); - - psrad(xmm2, 1); - psrad(xmm3, 1); - - movdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]); - movdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]); - - psrlw(xmm5, 1); - psrlw(xmm6, 1); - - if(m_sel.ltf) - { - // u -= 0x8000; - // v -= 0x8000; - - mov(eax, 0x8000); - movd(xmm4, eax); - pshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - - psubd(xmm2, xmm4); - psubd(xmm3, xmm4); - - // GSVector4i uf = u.xxzzlh().srl16(1); - - pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 12); - movdqa(ptr[&m_local.temp.uf], xmm0); - - // GSVector4i vf = v.xxzzlh().srl16(1); - - pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 12); - movdqa(ptr[&m_local.temp.vf], xmm0); - } - - // GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); - - psrad(xmm2, 16); - psrad(xmm3, 16); - packssdw(xmm2, xmm3); - - if(m_sel.ltf) - { - // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); - - movdqa(xmm3, xmm2); - pcmpeqd(xmm1, xmm1); - psrlw(xmm1, 15); - paddw(xmm3, xmm1); - - // uv0 = Wrap(uv0); - // uv1 = Wrap(uv1); - - WrapLOD(xmm2, xmm3); - } - else - { - // uv0 = Wrap(uv0); - - WrapLOD(xmm2); - } - - // xmm2 = uv0 - // xmm3 = uv1 (ltf) - // xmm0, xmm1, xmm4, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i x0 = uv0.upl16(); - // GSVector4i y0 = uv0.uph16() << tw; - - pxor(xmm0, xmm0); - - movdqa(xmm4, xmm2); - punpckhwd(xmm2, xmm0); - punpcklwd(xmm4, xmm0); - pslld(xmm2, m_sel.tw + 3); - - // xmm0 = 0 - // xmm2 = y0 - // xmm3 = uv1 (ltf) - // xmm4 = x0 - // xmm1, xmm5, xmm6 = free - // xmm7 = used - - if(m_sel.ltf) - { - // GSVector4i x1 = uv1.upl16(); - // GSVector4i y1 = uv1.uph16() << tw; - - movdqa(xmm6, xmm3); - punpckhwd(xmm3, xmm0); - punpcklwd(xmm6, xmm0); - pslld(xmm3, m_sel.tw + 3); - - // xmm2 = y0 - // xmm3 = y1 - // xmm4 = x0 - // xmm6 = x1 - // xmm0, xmm5, xmm6 = free - // xmm7 = used - - // GSVector4i addr00 = y0 + x0; - // GSVector4i addr01 = y0 + x1; - // GSVector4i addr10 = y1 + x0; - // GSVector4i addr11 = y1 + x1; - - movdqa(xmm5, xmm2); - paddd(xmm5, xmm4); - paddd(xmm2, xmm6); - - movdqa(xmm0, xmm3); - paddd(xmm0, xmm4); - paddd(xmm3, xmm6); - - // xmm5 = addr00 - // xmm2 = addr01 - // xmm0 = addr10 - // xmm3 = addr11 - // xmm1, xmm4, xmm6 = free - // xmm7 = used - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - // c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]); - // c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]); - // c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(4, 1); - - // xmm6 = c00 - // xmm4 = c01 - // xmm1 = c10 - // xmm5 = c11 - // xmm0, xmm2, xmm3 = free - // xmm7 = used - - movdqa(xmm0, ptr[&m_local.temp.uf]); - - // GSVector4i rb00 = c00 & mask; - // GSVector4i ga00 = (c00 >> 8) & mask; - - movdqa(xmm2, xmm6); - psllw(xmm2, 8); - psrlw(xmm2, 8); - psrlw(xmm6, 8); - - // GSVector4i rb01 = c01 & mask; - // GSVector4i ga01 = (c01 >> 8) & mask; - - movdqa(xmm3, xmm4); - psllw(xmm3, 8); - psrlw(xmm3, 8); - psrlw(xmm4, 8); - - // xmm0 = uf - // xmm2 = rb00 - // xmm3 = rb01 - // xmm6 = ga00 - // xmm4 = ga01 - // xmm1 = c10 - // xmm5 = c11 - // xmm7 = used - - // rb00 = rb00.lerp_4(rb01, uf); - // ga00 = ga00.lerp_4(ga01, uf); - - lerp16_4(xmm3, xmm2, xmm0); - lerp16_4(xmm4, xmm6, xmm0); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = c10 - // xmm5 = c11 - // xmm2, xmm6 = free - // xmm7 = used - - // GSVector4i rb10 = c10 & mask; - // GSVector4i ga10 = (c10 >> 8) & mask; - - movdqa(xmm2, xmm1); - psllw(xmm1, 8); - psrlw(xmm1, 8); - psrlw(xmm2, 8); - - // GSVector4i rb11 = c11 & mask; - // GSVector4i ga11 = (c11 >> 8) & mask; - - movdqa(xmm6, xmm5); - psllw(xmm5, 8); - psrlw(xmm5, 8); - psrlw(xmm6, 8); - - // xmm0 = uf - // xmm3 = rb00 - // xmm4 = ga00 - // xmm1 = rb10 - // xmm5 = rb11 - // xmm2 = ga10 - // xmm6 = ga11 - // xmm7 = used - - // rb10 = rb10.lerp_4(rb11, uf); - // ga10 = ga10.lerp_4(ga11, uf); - - lerp16_4(xmm5, xmm1, xmm0); - lerp16_4(xmm6, xmm2, xmm0); - - // xmm3 = rb00 - // xmm4 = ga00 - // xmm5 = rb10 - // xmm6 = ga10 - // xmm0, xmm1, xmm2 = free - // xmm7 = used - - // rb00 = rb00.lerp_4(rb10, vf); - // ga00 = ga00.lerp_4(ga10, vf); - - movdqa(xmm0, ptr[&m_local.temp.vf]); - - lerp16_4(xmm5, xmm3, xmm0); - lerp16_4(xmm6, xmm4, xmm0); - } - else - { - // GSVector4i addr00 = y0 + x0; - - paddd(xmm2, xmm4); - movdqa(xmm5, xmm2); - - // c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]); - - ReadTexel(1, 1); - - // GSVector4i mask = GSVector4i::x00ff(); - - // c[0] = c00 & mask; - // c[1] = (c00 >> 8) & mask; - - movdqa(xmm5, xmm6); - psllw(xmm5, 8); - psrlw(xmm5, 8); - psrlw(xmm6, 8); - } - - movdqa(xmm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]); - psrlw(xmm0, 1); - - movdqa(xmm2, ptr[&m_local.temp.trb]); - movdqa(xmm3, ptr[&m_local.temp.tga]); - - lerp16(xmm5, xmm2, xmm0, 0); - lerp16(xmm6, xmm3, xmm0, 0); - } - - pop(ebp); -} - -void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv) -{ - // xmm5 = minuv - // xmm6 = maxuv - // xmm0, xmm1, xmm4 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - pmaxsw(uv, xmm5); - } - else - { - pxor(xmm0, xmm0); - pmaxsw(uv, xmm0); - } - - pminsw(uv, xmm6); - } - else - { - pand(uv, xmm5); - - if(region) - { - por(uv, xmm6); - } - } - } - else - { - movdqa(xmm0, ptr[&m_local.gd->t.mask]); - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - movdqa(xmm1, uv); - - pand(xmm1, xmm5); - - if(region) - { - por(xmm1, xmm6); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - pmaxsw(uv, xmm5); - pminsw(uv, xmm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - blend8(uv, xmm1); - } -} - -void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1) -{ - // xmm5 = minuv - // xmm6 = maxuv - // xmm0, xmm1, xmm4 = free - - int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - - int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - - if(wms_clamp == wmt_clamp) - { - if(wms_clamp) - { - if(region) - { - pmaxsw(uv0, xmm5); - pmaxsw(uv1, xmm5); - } - else - { - pxor(xmm0, xmm0); - pmaxsw(uv0, xmm0); - pmaxsw(uv1, xmm0); - } - - pminsw(uv0, xmm6); - pminsw(uv1, xmm6); - } - else - { - pand(uv0, xmm5); - pand(uv1, xmm5); - - if(region) - { - por(uv0, xmm6); - por(uv1, xmm6); - } - } - } - else - { - #if _M_SSE >= 0x401 - - movdqa(xmm0, ptr[&m_local.gd->t.mask]); - - #else - - movdqa(xmm0, ptr[&m_local.gd->t.invmask]); - movdqa(xmm4, xmm0); - - #endif - - // uv0 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - movdqa(xmm1, uv0); - - pand(xmm1, xmm5); - - if(region) - { - por(xmm1, xmm6); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - pmaxsw(uv0, xmm5); - pminsw(uv0, xmm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - #if _M_SSE >= 0x401 - - pblendvb(uv0, xmm1); - - #else - - blendr(uv0, xmm1, xmm0); - - #endif - - // uv1 - - // GSVector4i repeat = (t & m_local.gd->t.min) | m_local.gd->t.max; - - movdqa(xmm1, uv1); - - pand(xmm1, xmm5); - - if(region) - { - por(xmm1, xmm6); - } - - // GSVector4i clamp = t.sat_i16(m_local.gd->t.min, m_local.gd->t.max); - - pmaxsw(uv1, xmm5); - pminsw(uv1, xmm6); - - // clamp.blend8(repeat, m_local.gd->t.mask); - - #if _M_SSE >= 0x401 - - pblendvb(uv1, xmm1); - - #else - - blendr(uv1, xmm1, xmm4); - - #endif - } -} - -void GSDrawScanlineCodeGenerator::AlphaTFX() -{ - if(!m_sel.fb) - { - return; - } - - switch(m_sel.tfx) - { - case TFX_MODULATE: - - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - // gat = gat.modulate16<1>(ga).clamp8(); - - modulate16(xmm6, xmm4, 1); - - clamp16(xmm6, xmm3); - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - psrlw(xmm4, 7); - - mix16(xmm6, xmm4, xmm3); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - psrlw(xmm4, 7); - - mix16(xmm6, xmm4, xmm3); - } - - break; - - case TFX_HIGHLIGHT: - - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - movdqa(xmm2, xmm4); - - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); - - psrlw(xmm4, 7); - - if(m_sel.tcc) - { - paddusb(xmm4, xmm6); - } - - mix16(xmm6, xmm4, xmm3); - - break; - - case TFX_HIGHLIGHT2: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - movdqa(xmm2, xmm4); - - psrlw(xmm4, 7); - - mix16(xmm6, xmm4, xmm3); - } - - break; - - case TFX_NONE: - - // gat = iip ? ga.srl16(7) : ga; - - if(m_sel.iip) - { - psrlw(xmm6, 7); - } - - break; - } - - if(m_sel.aa1) - { - // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha - - // FIXME: bios config screen cubes - - if(!m_sel.abe) - { - // a = cov - - if(m_sel.edge) - { - movdqa(xmm0, ptr[&m_local.temp.cov]); - } - else - { - pcmpeqd(xmm0, xmm0); - psllw(xmm0, 15); - psrlw(xmm0, 8); - } - - mix16(xmm6, xmm0, xmm1); - } - else - { - // a = a == 0x80 ? cov : a - - pcmpeqd(xmm0, xmm0); - psllw(xmm0, 15); - psrlw(xmm0, 8); - - if(m_sel.edge) - { - movdqa(xmm1, ptr[&m_local.temp.cov]); - } - else - { - movdqa(xmm1, xmm0); - } - - pcmpeqw(xmm0, xmm6); - psrld(xmm0, 16); - pslld(xmm0, 16); - - blend8(xmm6, xmm1); - } - } -} - -void GSDrawScanlineCodeGenerator::ReadMask() -{ - if(m_sel.fwrite) - { - movdqa(xmm3, ptr[&m_local.gd->fm]); - } - - if(m_sel.zwrite) - { - movdqa(xmm4, ptr[&m_local.gd->zm]); - } -} - -void GSDrawScanlineCodeGenerator::TestAlpha() -{ - switch(m_sel.afail) - { - case AFAIL_FB_ONLY: - if(!m_sel.zwrite) return; - break; - - case AFAIL_ZB_ONLY: - if(!m_sel.fwrite) return; - break; - - case AFAIL_RGB_ONLY: - if(!m_sel.zwrite && m_sel.fpsm == 1) return; - break; - } - - switch(m_sel.atst) - { - case ATST_NEVER: - // t = GSVector4i::xffffffff(); - pcmpeqd(xmm1, xmm1); - break; - - case ATST_ALWAYS: - return; - - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - movdqa(xmm1, xmm6); - psrld(xmm1, 16); - pcmpgtd(xmm1, ptr[&m_local.gd->aref]); - break; - - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - movdqa(xmm1, xmm6); - psrld(xmm1, 16); - pcmpeqd(xmm1, ptr[&m_local.gd->aref]); - pcmpeqd(xmm0, xmm0); - pxor(xmm1, xmm0); - break; - - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - movdqa(xmm0, xmm6); - psrld(xmm0, 16); - movdqa(xmm1, ptr[&m_local.gd->aref]); - pcmpgtd(xmm1, xmm0); - break; - - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - movdqa(xmm1, xmm6); - psrld(xmm1, 16); - pcmpeqd(xmm1, ptr[&m_local.gd->aref]); - break; - } - - switch(m_sel.afail) - { - case AFAIL_KEEP: - // test |= t; - por(xmm7, xmm1); - alltrue(); - break; - - case AFAIL_FB_ONLY: - // zm |= t; - por(xmm4, xmm1); - break; - - case AFAIL_ZB_ONLY: - // fm |= t; - por(xmm3, xmm1); - break; - - case AFAIL_RGB_ONLY: - // zm |= t; - por(xmm4, xmm1); - // fm |= t & GSVector4i::xff000000(); - psrld(xmm1, 24); - pslld(xmm1, 24); - por(xmm3, xmm1); - break; - } -} - -void GSDrawScanlineCodeGenerator::ColorTFX() -{ - if(!m_sel.fwrite) - { - return; - } - - switch(m_sel.tfx) - { - case TFX_MODULATE: - - // GSVector4i rb = iip ? rbf : m_local.c.rb; - - // rbt = rbt.modulate16<1>(rb).clamp8(); - - modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - - clamp16(xmm5, xmm1); - - break; - - case TFX_DECAL: - - break; - - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - - if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - movdqa(xmm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - } - - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - - movdqa(xmm1, xmm6); - - modulate16(xmm6, xmm2, 1); - - pshuflw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); - psrlw(xmm2, 7); - - paddw(xmm6, xmm2); - - clamp16(xmm6, xmm0); - - mix16(xmm6, xmm1, xmm0); - - // GSVector4i rb = iip ? rbf : m_local.c.rb; - - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - - modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - - paddw(xmm5, xmm2); - - clamp16(xmm5, xmm0); - - break; - - case TFX_NONE: - - // rbt = iip ? rb.srl16(7) : rb; - - if(m_sel.iip) - { - psrlw(xmm5, 7); - } - - break; - } -} - -void GSDrawScanlineCodeGenerator::Fog() -{ - if(!m_sel.fwrite || !m_sel.fge) - { - return; - } - - // rb = m_local.gd->frb.lerp16<0>(rb, f); - // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); - - movdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]); - movdqa(xmm1, xmm6); - - movdqa(xmm2, ptr[&m_local.gd->frb]); - lerp16(xmm5, xmm2, xmm0, 0); - - movdqa(xmm2, ptr[&m_local.gd->fga]); - lerp16(xmm6, xmm2, xmm0, 0); - mix16(xmm6, xmm1, xmm0); -} - -void GSDrawScanlineCodeGenerator::ReadFrame() -{ - if(!m_sel.fb) - { - return; - } - - // int fa = fza_base.x + fza_offset->x; - - mov(ebx, ptr[esi]); - add(ebx, ptr[edi]); - - if(!m_sel.rfb) - { - return; - } - - ReadPixel(xmm2, ebx); -} - -void GSDrawScanlineCodeGenerator::TestDestAlpha() -{ - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) - { - return; - } - - // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); - - movdqa(xmm1, xmm2); - - if(m_sel.datm) - { - if(m_sel.fpsm == 2) - { - pxor(xmm0, xmm0); - psrld(xmm1, 15); - pcmpeqd(xmm1, xmm0); - } - else - { - pcmpeqd(xmm0, xmm0); - pxor(xmm1, xmm0); - psrad(xmm1, 31); - } - } - else - { - if(m_sel.fpsm == 2) - { - pslld(xmm1, 16); - } - - psrad(xmm1, 31); - } - - por(xmm7, xmm1); - - alltrue(); -} - -void GSDrawScanlineCodeGenerator::WriteMask() -{ - if(m_sel.notest) - { - return; - } - - // fm |= test; - // zm |= test; - - if(m_sel.fwrite) - { - por(xmm3, xmm7); - } - - if(m_sel.zwrite) - { - por(xmm4, xmm7); - } - - // int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask(); - - pcmpeqd(xmm1, xmm1); - - if(m_sel.fwrite && m_sel.zwrite) - { - movdqa(xmm0, xmm1); - pcmpeqd(xmm1, xmm3); - pcmpeqd(xmm0, xmm4); - packssdw(xmm1, xmm0); - } - else if(m_sel.fwrite) - { - pcmpeqd(xmm1, xmm3); - packssdw(xmm1, xmm1); - } - else if(m_sel.zwrite) - { - pcmpeqd(xmm1, xmm4); - packssdw(xmm1, xmm1); - } - - pmovmskb(edx, xmm1); - - not(edx); -} - -void GSDrawScanlineCodeGenerator::WriteZBuf() -{ - if(!m_sel.zwrite) - { - return; - } - - movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]); - - if(m_sel.ztest && m_sel.zpsm < 2) - { - // zs = zs.blend8(zd, zm); - - movdqa(xmm0, xmm4); - movdqa(xmm7, ptr[&m_local.temp.zd]); - blend8(xmm1, xmm7); - } - - bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest; - - WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1); -} - -void GSDrawScanlineCodeGenerator::AlphaBlend() -{ - if(!m_sel.fwrite) - { - return; - } - - if(m_sel.abe == 0 && m_sel.aa1 == 0) - { - return; - } - - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) - { - switch(m_sel.fpsm) - { - case 0: - case 1: - - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; - - movdqa(xmm0, xmm2); - movdqa(xmm1, xmm2); - - psllw(xmm0, 8); - psrlw(xmm0, 8); - psrlw(xmm1, 8); - - break; - - case 2: - - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - - movdqa(xmm0, xmm2); - movdqa(xmm1, xmm2); - movdqa(xmm4, xmm2); - - pcmpeqd(xmm7, xmm7); - psrld(xmm7, 27); // 0x0000001f - pand(xmm0, xmm7); - pslld(xmm0, 3); - - pslld(xmm7, 10); // 0x00007c00 - pand(xmm4, xmm7); - pslld(xmm4, 9); - - por(xmm0, xmm4); - - movdqa(xmm4, xmm1); - - psrld(xmm7, 5); // 0x000003e0 - pand(xmm1, xmm7); - psrld(xmm1, 2); - - psllw(xmm7, 10); // 0x00008000 - pand(xmm4, xmm7); - pslld(xmm4, 8); - - por(xmm1, xmm4); - - break; - } - } - - // xmm5, xmm6 = src rb, ga - // xmm0, xmm1 = dst rb, ga - // xmm2, xmm3 = used - // xmm4, xmm7 = free - - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) - { - movdqa(xmm4, xmm5); - } - - if(m_sel.aba != m_sel.abb) - { - // rb = c[aba * 2 + 0]; - - switch(m_sel.aba) - { - case 0: break; - case 1: movdqa(xmm5, xmm0); break; - case 2: pxor(xmm5, xmm5); break; - } - - // rb = rb.sub16(c[abb * 2 + 0]); - - switch(m_sel.abb) - { - case 0: psubw(xmm5, xmm4); break; - case 1: psubw(xmm5, xmm0); break; - case 2: break; - } - - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) - { - // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - - switch(m_sel.abc) - { - case 0: - case 1: - pshuflw(xmm7, m_sel.abc ? xmm1 : xmm6, _MM_SHUFFLE(3, 3, 1, 1)); - pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); - psllw(xmm7, 7); - break; - case 2: - movdqa(xmm7, ptr[&m_local.gd->afix]); - break; - } - - // rb = rb.modulate16<1>(a); - - modulate16(xmm5, xmm7, 1); - } - - // rb = rb.add16(c[abd * 2 + 0]); - - switch(m_sel.abd) - { - case 0: paddw(xmm5, xmm4); break; - case 1: paddw(xmm5, xmm0); break; - case 2: break; - } - } - else - { - // rb = c[abd * 2 + 0]; - - switch(m_sel.abd) - { - case 0: break; - case 1: movdqa(xmm5, xmm0); break; - case 2: pxor(xmm5, xmm5); break; - } - } - - if(m_sel.pabe) - { - // mask = (c[1] << 8).sra32(31); - - movdqa(xmm0, xmm6); - pslld(xmm0, 8); - psrad(xmm0, 31); - - // rb = c[0].blend8(rb, mask); - - blend8r(xmm5, xmm4); - } - - // xmm6 = src ga - // xmm1 = dst ga - // xmm5 = rb - // xmm7 = a - // xmm2, xmm3 = used - // xmm0, xmm4 = free - - movdqa(xmm4, xmm6); - - if(m_sel.aba != m_sel.abb) - { - // ga = c[aba * 2 + 1]; - - switch(m_sel.aba) - { - case 0: break; - case 1: movdqa(xmm6, xmm1); break; - case 2: pxor(xmm6, xmm6); break; - } - - // ga = ga.sub16(c[abeb * 2 + 1]); - - switch(m_sel.abb) - { - case 0: psubw(xmm6, xmm4); break; - case 1: psubw(xmm6, xmm1); break; - case 2: break; - } - - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) - { - // ga = ga.modulate16<1>(a); - - modulate16(xmm6, xmm7, 1); - } - - // ga = ga.add16(c[abd * 2 + 1]); - - switch(m_sel.abd) - { - case 0: paddw(xmm6, xmm4); break; - case 1: paddw(xmm6, xmm1); break; - case 2: break; - } - } - else - { - // ga = c[abd * 2 + 1]; - - switch(m_sel.abd) - { - case 0: break; - case 1: movdqa(xmm6, xmm1); break; - case 2: pxor(xmm6, xmm6); break; - } - } - - // xmm4 = src ga - // xmm5 = rb - // xmm6 = ga - // xmm2, xmm3 = used - // xmm0, xmm1, xmm7 = free - - if(m_sel.pabe) - { - #if _M_SSE < 0x401 - - // doh, previous blend8r overwrote xmm0 (sse41 uses pblendvb) - - movdqa(xmm0, xmm4); - pslld(xmm0, 8); - psrad(xmm0, 31); - - #endif - - psrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) - - // ga = c[1].blend8(ga, mask).mix16(c[1]); - - blend8r(xmm6, xmm4); - } - else - { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx - { - mix16(xmm6, xmm4, xmm7); - } - } -} - -void GSDrawScanlineCodeGenerator::WriteFrame() -{ - if(!m_sel.fwrite) - { - return; - } - - if(m_sel.fpsm == 2 && m_sel.dthe) - { - mov(eax, ptr[esp + _top]); - and(eax, 3); - shl(eax, 5); - mov(ebp, ptr[&m_local.gd->dimx]); - paddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]); - paddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]); - } - - if(m_sel.colclamp == 0) - { - // c[0] &= 0x000000ff; - // c[1] &= 0x000000ff; - - pcmpeqd(xmm7, xmm7); - psrlw(xmm7, 8); - pand(xmm5, xmm7); - pand(xmm6, xmm7); - } - - // GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1])); - - movdqa(xmm7, xmm5); - punpcklwd(xmm5, xmm6); - punpckhwd(xmm7, xmm6); - packuswb(xmm5, xmm7); - - if(m_sel.fba && m_sel.fpsm != 1) - { - // fs |= 0x80000000; - - pcmpeqd(xmm7, xmm7); - pslld(xmm7, 31); - por(xmm5, xmm7); - } - - if(m_sel.fpsm == 2) - { - // GSVector4i rb = fs & 0x00f800f8; - // GSVector4i ga = fs & 0x8000f800; - - mov(eax, 0x00f800f8); - movd(xmm6, eax); - pshufd(xmm6, xmm6, _MM_SHUFFLE(0, 0, 0, 0)); - - mov(eax, 0x8000f800); - movd(xmm7, eax); - pshufd(xmm7, xmm7, _MM_SHUFFLE(0, 0, 0, 0)); - - movdqa(xmm4, xmm5); - pand(xmm4, xmm6); - pand(xmm5, xmm7); - - // fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); - - movdqa(xmm6, xmm4); - movdqa(xmm7, xmm5); - - psrld(xmm4, 3); - psrld(xmm6, 9); - psrld(xmm5, 6); - psrld(xmm7, 16); - - por(xmm5, xmm4); - por(xmm7, xmm6); - por(xmm5, xmm7); - } - - if(m_sel.rfb) - { - // fs = fs.blend(fd, fm); - - blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm - } - - bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest; - - WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0); -} - -void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr) -{ - movq(dst, qword[addr * 2 + (size_t)m_local.gd->vm]); - movhps(dst, qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2]); -} - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz) -{ - if(m_sel.notest) - { - if(fast) - { - movq(qword[addr * 2 + (size_t)m_local.gd->vm], src); - movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src); - } - else - { - WritePixel(src, addr, 0, psm); - WritePixel(src, addr, 1, psm); - WritePixel(src, addr, 2, psm); - WritePixel(src, addr, 3, psm); - } - } - else - { - if(fast) - { - // if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs); - // if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs); - - test(mask, 0x0f); - je("@f"); - movq(qword[addr * 2 + (size_t)m_local.gd->vm], src); - L("@@"); - - test(mask, 0xf0); - je("@f"); - movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src); - L("@@"); - } - else - { - // if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>()); - // if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>()); - // if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>()); - // if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>()); - - test(mask, 0x03); - je("@f"); - WritePixel(src, addr, 0, psm); - L("@@"); - - test(mask, 0x0c); - je("@f"); - WritePixel(src, addr, 1, psm); - L("@@"); - - test(mask, 0x30); - je("@f"); - WritePixel(src, addr, 2, psm); - L("@@"); - - test(mask, 0xc0); - je("@f"); - WritePixel(src, addr, 3, psm); - L("@@"); - } - } -} - -static const int s_offsets[4] = {0, 2, 8, 10}; - -void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm) -{ - Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2]; - - switch(psm) - { - case 0: - if(i == 0) movd(dst, src); - #if _M_SSE >= 0x401 - else pextrd(dst, src, i); - #else - else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, xmm0);} - #endif - break; - case 1: - if(i == 0) movd(eax, src); - #if _M_SSE >= 0x401 - else pextrd(eax, src, i); - #else - else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, xmm0);} - #endif - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - if(i == 0) movd(eax, src); - else pextrw(eax, src, i * 2); - mov(dst, ax); - break; - } -} - -void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) -{ - // in - // xmm5 = addr00 - // xmm2 = addr01 - // xmm0 = addr10 - // xmm3 = addr11 - // ebx = m_local.tex[0] (!m_sel.mmin) - // ebp = m_local.tex (m_sel.mmin) - // edx = m_local.clut (m_sel.tlu) - - // out - // xmm6 = c00 - // xmm4 = c01 - // xmm1 = c10 - // xmm5 = c11 - - ASSERT(pixels == 1 || pixels == 4); - - mip_offset *= sizeof(void*); - - const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - - if(m_sel.mmin && !m_sel.lcm) - { - #if _M_SSE >= 0x401 - - const int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; - - if(pixels == 4) - { - movdqa(ptr[&m_local.temp.test], xmm7); - } - - for(int j = 0; j < 4; j++) - { - mov(ebx, ptr[&lod_i->u32[j]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - for(int i = 0; i < pixels; i++) - { - ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); - } - } - - if(pixels == 4) - { - movdqa(xmm5, xmm7); - movdqa(xmm7, ptr[&m_local.temp.test]); - } - - #else - - if(pixels == 4) - { - movdqa(ptr[&m_local.temp.test], xmm7); - - mov(ebx, ptr[&lod_i->u32[0]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm6, xmm5, 0); - psrldq(xmm5, 4); - ReadTexel(xmm4, xmm2, 0); - psrldq(xmm2, 4); - - mov(ebx, ptr[&lod_i->u32[1]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm1, xmm5, 0); - psrldq(xmm5, 4); - ReadTexel(xmm7, xmm2, 0); - psrldq(xmm2, 4); - - punpckldq(xmm6, xmm1); - punpckldq(xmm4, xmm7); - - mov(ebx, ptr[&lod_i->u32[2]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm1, xmm5, 0); - psrldq(xmm5, 4); - ReadTexel(xmm7, xmm2, 0); - psrldq(xmm2, 4); - - mov(ebx, ptr[&lod_i->u32[3]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm5, xmm5, 0); - ReadTexel(xmm2, xmm2, 0); - - punpckldq(xmm1, xmm5); - punpckldq(xmm7, xmm2); - - punpcklqdq(xmm6, xmm1); - punpcklqdq(xmm4, xmm7); - - mov(ebx, ptr[&lod_i->u32[0]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm1, xmm0, 0); - psrldq(xmm0, 4); - ReadTexel(xmm5, xmm3, 0); - psrldq(xmm3, 4); - - mov(ebx, ptr[&lod_i->u32[1]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm2, xmm0, 0); - psrldq(xmm0, 4); - ReadTexel(xmm7, xmm3, 0); - psrldq(xmm3, 4); - - punpckldq(xmm1, xmm2); - punpckldq(xmm5, xmm7); - - mov(ebx, ptr[&lod_i->u32[2]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm2, xmm0, 0); - psrldq(xmm0, 4); - ReadTexel(xmm7, xmm3, 0); - psrldq(xmm3, 4); - - mov(ebx, ptr[&lod_i->u32[3]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm0, xmm0, 0); - ReadTexel(xmm3, xmm3, 0); - - punpckldq(xmm2, xmm0); - punpckldq(xmm7, xmm3); - - punpcklqdq(xmm1, xmm2); - punpcklqdq(xmm5, xmm7); - - movdqa(xmm7, ptr[&m_local.temp.test]); - } - else - { - mov(ebx, ptr[&lod_i->u32[0]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm6, xmm5, 0); - psrldq(xmm5, 4); // shuffle instead? (1 2 3 0 ~ rotation) - - mov(ebx, ptr[&lod_i->u32[1]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm1, xmm5, 0); - psrldq(xmm5, 4); - - punpckldq(xmm6, xmm1); - - mov(ebx, ptr[&lod_i->u32[2]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm1, xmm5, 0); - psrldq(xmm5, 4); - - mov(ebx, ptr[&lod_i->u32[3]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - - ReadTexel(xmm4, xmm5, 0); - // psrldq(xmm5, 4); - - punpckldq(xmm1, xmm4); - - punpcklqdq(xmm6, xmm1); - } - - #endif - } - else - { - if(m_sel.mmin && m_sel.lcm) - { - mov(ebx, ptr[&lod_i->u32[0]]); - mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - } - - const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; - - #if _M_SSE >= 0x401 - - for(int i = 0; i < pixels; i++) - { - for(int j = 0; j < 4; j++) - { - ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); - } - } - - #else - - const int t[] = {1, 4, 1, 5, 2, 5, 2, 0}; - - for(int i = 0; i < pixels; i++) - { - const Xmm& addr = Xmm(r[i * 2 + 0]); - const Xmm& dst = Xmm(r[i * 2 + 1]); - const Xmm& temp1 = Xmm(t[i * 2 + 0]); - const Xmm& temp2 = Xmm(t[i * 2 + 1]); - - ReadTexel(dst, addr, 0); - psrldq(addr, 4); // shuffle instead? (1 2 3 0 ~ rotation) - ReadTexel(temp1, addr, 0); - psrldq(addr, 4); - punpckldq(dst, temp1); - - ReadTexel(temp1, addr, 0); - psrldq(addr, 4); - ReadTexel(temp2, addr, 0); - // psrldq(addr, 4); - punpckldq(temp1, temp2); - - punpcklqdq(dst, temp1); - } - - #endif - } -} - -void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i) -{ - const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; - - #if _M_SSE < 0x401 - - ASSERT(i == 0); - - #endif - - if(i == 0) movd(eax, addr); - else pextrd(eax, addr, i); - - if(m_sel.tlu) movzx(eax, byte[ebx + eax]); - - if(i == 0) movd(dst, src); - else pinsrd(dst, src, i); -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSDrawingContext.cpp b/plugins/GSdx_legacy/GSDrawingContext.cpp deleted file mode 100644 index d9cd94d396..0000000000 --- a/plugins/GSdx_legacy/GSDrawingContext.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* -* Copyright (C) 2007-2009 Gabest -* http://www.gabest.org -* -* This Program is free software; you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation; either version 2, or (at your option) -* any later version. -* -* This Program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with GNU Make; see the file COPYING. If not, write to -* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. -* http://www.gnu.org/copyleft/gpl.html -* -*/ - -#include "stdafx.h" -#include "GSDrawingContext.h" -#include "GSdx.h" - -static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv) -{ - // return max possible texcoord - - int uv = br; - - if(wm == CLAMP_CLAMP) - { - if(uv > limit) uv = limit; - } - else if(wm == CLAMP_REPEAT) - { - if(tl < 0) uv = limit; // wrap around - else if(uv > limit) uv = limit; - } - else if(wm == CLAMP_REGION_CLAMP) - { - if(uv < minuv) uv = minuv; - if(uv > maxuv) uv = maxuv; - } - else if(wm == CLAMP_REGION_REPEAT) - { - if(tl < 0) uv = minuv | maxuv; // wrap around, just use (any & mask) | fix - else uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask) - } - - return uv; -} - -static int reduce(int uv, int size) -{ - while(size > 3 && (1 << (size - 1)) >= uv + 1) - { - size--; - } - - return size; -} - -static int extend(int uv, int size) -{ - while(size < 10 && (1 << size) < uv + 1) - { - size++; - } - - return size; -} - -GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap) -{ - if(mipmap) return TEX0; // no mipmaping allowed - - // find the optimal value for TW/TH by analyzing vertex trace and clamping values, extending only for region modes where uv may be outside - - int tw = TEX0.TW; - int th = TEX0.TH; - - int wms = (int)CLAMP.WMS; - int wmt = (int)CLAMP.WMT; - - int minu = (int)CLAMP.MINU; - int minv = (int)CLAMP.MINV; - int maxu = (int)CLAMP.MAXU; - int maxv = (int)CLAMP.MAXV; - - GSVector4 uvf = st; - - if(linear) - { - uvf += GSVector4(-0.5f, 0.5f).xxyy(); - } - - GSVector4i uv = GSVector4i(uvf.floor()); - - uv.x = findmax(uv.x, uv.z, (1 << tw) - 1, wms, minu, maxu); - uv.y = findmax(uv.y, uv.w, (1 << th) - 1, wmt, minv, maxv); - - if(tw + th >= 19) // smaller sizes aren't worth, they just create multiple entries in the textue cache and the saved memory is less - { - tw = reduce(uv.x, tw); - th = reduce(uv.y, th); - } - - if(wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT) - { - tw = extend(uv.x, tw); - } - - if(wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT) - { - th = extend(uv.y, th); - } - -#ifdef _DEBUG - if(TEX0.TW != tw || TEX0.TH != th) - { - printf("FixedTEX0 %05x %d %d tw %d=>%d th %d=>%d st (%.0f,%.0f,%.0f,%.0f) uvmax %d,%d wm %d,%d (%d,%d,%d,%d)\n", - (int)TEX0.TBP0, (int)TEX0.TBW, (int)TEX0.PSM, - (int)TEX0.TW, tw, (int)TEX0.TH, th, - uvf.x, uvf.y, uvf.z, uvf.w, - uv.x, uv.y, - wms, wmt, minu, maxu, minv, maxv); - } -#endif - - GIFRegTEX0 res = TEX0; - - res.TW = tw; - res.TH = th; - - return res; -} diff --git a/plugins/GSdx_legacy/GSDrawingContext.h b/plugins/GSdx_legacy/GSDrawingContext.h deleted file mode 100644 index 9ac9f92c09..0000000000 --- a/plugins/GSdx_legacy/GSDrawingContext.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSLocalMemory.h" - -__aligned(class, 32) GSDrawingContext -{ -public: - GIFRegXYOFFSET XYOFFSET; - GIFRegTEX0 TEX0; - GIFRegTEX1 TEX1; - GIFRegTEX2 TEX2; - GIFRegCLAMP CLAMP; - GIFRegMIPTBP1 MIPTBP1; - GIFRegMIPTBP2 MIPTBP2; - GIFRegSCISSOR SCISSOR; - GIFRegALPHA ALPHA; - GIFRegTEST TEST; - GIFRegFBA FBA; - GIFRegFRAME FRAME; - GIFRegZBUF ZBUF; - - struct - { - GSVector4 in; - GSVector4i ex; - GSVector4 ofex; - GSVector4i ofxy; - } scissor; - - struct - { - GSOffset* fb; - GSOffset* zb; - GSOffset* tex; - GSPixelOffset* fzb; - GSPixelOffset4* fzb4; - } offset; - - GSDrawingContext() - { - memset(&offset, 0, sizeof(offset)); - - Reset(); - } - - void Reset() - { - memset(&XYOFFSET, 0, sizeof(XYOFFSET)); - memset(&TEX0, 0, sizeof(TEX0)); - memset(&TEX1, 0, sizeof(TEX1)); - memset(&TEX2, 0, sizeof(TEX2)); - memset(&CLAMP, 0, sizeof(CLAMP)); - memset(&MIPTBP1, 0, sizeof(MIPTBP1)); - memset(&MIPTBP2, 0, sizeof(MIPTBP2)); - memset(&SCISSOR, 0, sizeof(SCISSOR)); - memset(&ALPHA, 0, sizeof(ALPHA)); - memset(&TEST, 0, sizeof(TEST)); - memset(&FBA, 0, sizeof(FBA)); - memset(&FRAME, 0, sizeof(FRAME)); - memset(&ZBUF, 0, sizeof(ZBUF)); - } - - void UpdateScissor() - { - ASSERT(XYOFFSET.OFX <= 0xf800 && XYOFFSET.OFY <= 0xf800); - - scissor.ex.u16[0] = (uint16)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX - 0x8000); - scissor.ex.u16[1] = (uint16)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY - 0x8000); - scissor.ex.u16[2] = (uint16)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX - 0x8000); - scissor.ex.u16[3] = (uint16)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY - 0x8000); - - scissor.ofex = GSVector4( - (int)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX), - (int)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY), - (int)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX), - (int)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY)); - - scissor.in = GSVector4( - (int)SCISSOR.SCAX0, - (int)SCISSOR.SCAY0, - (int)SCISSOR.SCAX1 + 1, - (int)SCISSOR.SCAY1 + 1); - - scissor.ofxy = GSVector4i( - 0x8000, - 0x8000, - (int)XYOFFSET.OFX - 15, - (int)XYOFFSET.OFY - 15); - } - - bool DepthRead() const - { - return TEST.ZTE && TEST.ZTST >= 2; - } - - bool DepthWrite() const - { - if(TEST.ATE && TEST.ATST == ATST_NEVER && TEST.AFAIL != AFAIL_ZB_ONLY) // alpha test, all pixels fail, z buffer is not updated - { - return false; - } - - return ZBUF.ZMSK == 0 && TEST.ZTE != 0; // ZTE == 0 is bug on the real hardware, write is blocked then - } - - GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap); - - void Dump(const std::string& filename) - { - // Append on purpose so env + context are merged into a single file - FILE* fp = fopen(filename.c_str(), "at"); - if (!fp) return; - - fprintf(fp, "XYOFFSET\n" - "\tX:%d\n" - "\tY:%d\n\n" - , XYOFFSET.OFX, XYOFFSET.OFY); - - fprintf(fp, "TEX0\n" - "\tTBP0:0x%x\n" - "\tTBW:%d\n" - "\tPSM:0x%x\n" - "\tTW:%d\n" - "\tTCC:%d\n" - "\tTFX:%d\n" - "\tCBP:0x%x\n" - "\tCPSM:0x%x\n" - "\tCSM:%d\n" - "\tCSA:%d\n" - "\tCLD:%d\n" - "\tTH:%lld\n\n" - , TEX0.TBP0, TEX0.TBW, TEX0.PSM, TEX0.TW, TEX0.TCC, TEX0.TFX, TEX0.CBP, TEX0.CPSM, TEX0.CSM, TEX0.CSA, TEX0.CLD, TEX0.TH); - - fprintf(fp, "TEX1\n" - "\tLCM:%d\n" - "\tMXL:%d\n" - "\tMMAG:%d\n" - "\tMMIN:%d\n" - "\tMTBA:%d\n" - "\tL:%d\n" - "\tK:%d\n\n" - , TEX1.LCM, TEX1.MXL, TEX1.MMAG, TEX1.MMIN, TEX1.MTBA, TEX1.L, TEX1.K); - - fprintf(fp, "TEX2\n" - "\tPSM:0x%x\n" - "\tCBP:0x%x\n" - "\tCPSM:0x%x\n" - "\tCSM:%d\n" - "\tCSA:%d\n" - "\tCLD:%d\n\n" - , TEX2.PSM, TEX2.CBP, TEX2.CPSM, TEX2.CSM, TEX2.CSA, TEX2.CLD); - - fprintf(fp, "CLAMP\n" - "\tWMS:%d\n" - "\tWMT:%d\n" - "\tMINU:%d\n" - "\tMAXU:%d\n" - "\tMAXV:%d\n" - "\tMINV:%lld\n\n" - , CLAMP.WMS, CLAMP.WMT, CLAMP.MINU, CLAMP.MAXU, CLAMP.MAXV, CLAMP.MINV); - - // TODO mimmap? (yes I'm lazy) - fprintf(fp, "SCISSOR\n" - "\tX0:%d\n" - "\tX1:%d\n" - "\tY0:%d\n" - "\tY1:%d\n\n" - , SCISSOR.SCAX0, SCISSOR.SCAX1, SCISSOR.SCAY0, SCISSOR.SCAY1); - - fprintf(fp, "ALPHA\n" - "\tA:%d\n" - "\tB:%d\n" - "\tC:%d\n" - "\tD:%d\n" - "\tFIX:%d\n" - , ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, ALPHA.FIX); - const char *col[3] = {"Cs", "Cd", "0"}; - const char *alpha[3] = {"As", "Ad", "Af"}; - fprintf(fp, "\t=> (%s - %s) * %s + %s\n\n", col[ALPHA.A], col[ALPHA.B], alpha[ALPHA.C], col[ALPHA.D]); - - fprintf(fp, "TEST\n" - "\tATE:%d\n" - "\tATST:%d\n" - "\tAREF:%d\n" - "\tAFAIL:%d\n" - "\tDATE:%d\n" - "\tDATM:%d\n" - "\tZTE:%d\n" - "\tZTST:%d\n\n" - , TEST.ATE, TEST.ATST, TEST.AREF, TEST.AFAIL, TEST.DATE, TEST.DATM, TEST.ZTE, TEST.ZTST); - - fprintf(fp, "FBA\n" - "\tFBA:%d\n\n" - , FBA.FBA); - - fprintf(fp, "FRAME\n" - "\tFBP (*32):0x%x\n" - "\tFBW:%d\n" - "\tPSM:0x%x\n" - "\tFBMSK:0x%x\n\n" - , FRAME.FBP*32, FRAME.FBW, FRAME.PSM, FRAME.FBMSK); - - fprintf(fp, "ZBUF\n" - "\tZBP (*32):0x%x\n" - "\tPSM:0x%x\n" - "\tZMSK:%d\n\n" - , ZBUF.ZBP*32, ZBUF.PSM, ZBUF.ZMSK); - - fclose(fp); - } -}; diff --git a/plugins/GSdx_legacy/GSDrawingEnvironment.h b/plugins/GSdx_legacy/GSDrawingEnvironment.h deleted file mode 100644 index 07c6d4eab2..0000000000 --- a/plugins/GSdx_legacy/GSDrawingEnvironment.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" - -__aligned(class, 32) GSDrawingEnvironment -{ -public: - GIFRegPRIM PRIM; - GIFRegPRMODE PRMODE; - GIFRegPRMODECONT PRMODECONT; - GIFRegTEXCLUT TEXCLUT; - GIFRegSCANMSK SCANMSK; - GIFRegTEXA TEXA; - GIFRegFOGCOL FOGCOL; - GIFRegDIMX DIMX; - GIFRegDTHE DTHE; - GIFRegCOLCLAMP COLCLAMP; - GIFRegPABE PABE; - GIFRegBITBLTBUF BITBLTBUF; - GIFRegTRXDIR TRXDIR; - GIFRegTRXPOS TRXPOS; - GIFRegTRXREG TRXREG; - GSDrawingContext CTXT[2]; - - GSDrawingEnvironment() - { - } - - void Reset() - { - memset(&PRIM, 0, sizeof(PRIM)); - memset(&PRMODE, 0, sizeof(PRMODE)); - memset(&PRMODECONT, 0, sizeof(PRMODECONT)); - memset(&TEXCLUT, 0, sizeof(TEXCLUT)); - memset(&SCANMSK, 0, sizeof(SCANMSK)); - memset(&TEXA, 0, sizeof(TEXA)); - memset(&FOGCOL, 0, sizeof(FOGCOL)); - memset(&DIMX, 0, sizeof(DIMX)); - memset(&DTHE, 0, sizeof(DTHE)); - memset(&COLCLAMP, 0, sizeof(COLCLAMP)); - memset(&PABE, 0, sizeof(PABE)); - memset(&BITBLTBUF, 0, sizeof(BITBLTBUF)); - memset(&TRXDIR, 0, sizeof(TRXDIR)); - memset(&TRXPOS, 0, sizeof(TRXPOS)); - memset(&TRXREG, 0, sizeof(TRXREG)); - - CTXT[0].Reset(); - CTXT[1].Reset(); - - memset(dimx, 0, sizeof(dimx)); - } - - GSVector4i dimx[8]; - - void UpdateDIMX() - { - dimx[1] = GSVector4i(DIMX.DM00, 0, DIMX.DM01, 0, DIMX.DM02, 0, DIMX.DM03, 0); - dimx[0] = dimx[1].xxzzlh(); - dimx[3] = GSVector4i(DIMX.DM10, 0, DIMX.DM11, 0, DIMX.DM12, 0, DIMX.DM13, 0), - dimx[2] = dimx[3].xxzzlh(); - dimx[5] = GSVector4i(DIMX.DM20, 0, DIMX.DM21, 0, DIMX.DM22, 0, DIMX.DM23, 0), - dimx[4] = dimx[5].xxzzlh(); - dimx[7] = GSVector4i(DIMX.DM30, 0, DIMX.DM31, 0, DIMX.DM32, 0, DIMX.DM33, 0), - dimx[6] = dimx[7].xxzzlh(); - } - - void Dump(const std::string& filename) - { - FILE* fp = fopen(filename.c_str(), "wt"); - if (!fp) return; - - fprintf(fp, "PRIM\n" - "\tPRIM:%d\n" - "\tIIP:%d\n" - "\tTME:%d\n" - "\tFGE:%d\n" - "\tABE:%d\n" - "\tAA1:%d\n" - "\tFST:%d\n" - "\tCTXT:%d\n" - "\tFIX:%d\n\n" - , PRIM.PRIM, PRIM.IIP, PRIM.TME, PRIM.FGE, PRIM.ABE, PRIM.AA1, PRIM.FST, PRIM.CTXT, PRIM.FIX); - - fprintf(fp, "PRMODE (when AC=0)\n" - "\t_PRIM:%d\n" - "\tIIP:%d\n" - "\tTME:%d\n" - "\tFGE:%d\n" - "\tABE:%d\n" - "\tAA1:%d\n" - "\tFST:%d\n" - "\tCTXT:%d\n" - "\tFIX:%d\n\n" - , PRMODE._PRIM, PRMODE.IIP, PRMODE.TME, PRMODE.FGE, PRMODE.ABE, PRMODE.AA1, PRMODE.FST, PRMODE.CTXT, PRMODE.FIX); - - fprintf(fp, "PRMODECONT\n" - "\tAC:%d\n\n" - , PRMODECONT.AC); - - fprintf(fp, "TEXCLUT\n" - "\tCOU:%d\n" - "\tCBW:%d\n" - "\tCOV:%d\n\n" - , TEXCLUT.COU, TEXCLUT.CBW, TEXCLUT.COV); - - fprintf(fp, "SCANMSK\n" - "\tMSK:%d\n\n" - "\n" - , SCANMSK.MSK); - - fprintf(fp, "TEXA\n" - "\tAEM:%d\n" - "\tTA0:%d\n" - "\tTA1:%d\n\n" - , TEXA.AEM, TEXA.TA0, TEXA.TA1); - - fprintf(fp, "FOGCOL\n" - "\tFCG:%d\n" - "\tFCB:%d\n" - "\tFCR:%d\n\n" - , FOGCOL.FCG, FOGCOL.FCB, FOGCOL.FCR); - - fprintf(fp, "DIMX\n" - "\tDM22:%d\n" - "\tDM23:%d\n" - "\tDM31:%d\n" - "\tDM02:%d\n" - "\tDM21:%d\n" - "\tDM12:%d\n" - "\tDM03:%d\n" - "\tDM01:%d\n" - "\tDM33:%d\n" - "\tDM30:%d\n" - "\tDM11:%d\n" - "\tDM10:%d\n" - "\tDM20:%d\n" - "\tDM32:%d\n" - "\tDM00:%d\n" - "\tDM13:%d\n\n" - , DIMX.DM22, DIMX.DM23, DIMX.DM31, DIMX.DM02, DIMX.DM21, DIMX.DM12, DIMX.DM03, DIMX.DM01, DIMX.DM33, DIMX.DM30, DIMX.DM11, DIMX.DM10, DIMX.DM20, DIMX.DM32, DIMX.DM00, DIMX.DM13); - - fprintf(fp, "DTHE\n" - "\tDTHE:%d\n\n" - , DTHE.DTHE); - - fprintf(fp, "COLCLAMP\n" - "\tCLAMP:%d\n\n" - , COLCLAMP.CLAMP); - - fprintf(fp, "PABE\n" - "\tPABE:%d\n\n" - , PABE.PABE); - - fprintf(fp, "BITBLTBUF\n" - "\tSBW:%d\n" - "\tSBP:0x%x\n" - "\tSPSM:%d\n" - "\tDBW:%d\n" - "\tDPSM:%d\n" - "\tDBP:0x%x\n\n" - , BITBLTBUF.SBW, BITBLTBUF.SBP, BITBLTBUF.SPSM, BITBLTBUF.DBW, BITBLTBUF.DPSM, BITBLTBUF.DBP); - - fprintf(fp, "TRXDIR\n" - "\tXDIR:%d\n\n" - , TRXDIR.XDIR); - - fprintf(fp, "TRXPOS\n" - "\tDIRY:%d\n" - "\tSSAY:%d\n" - "\tSSAX:%d\n" - "\tDIRX:%d\n" - "\tDSAX:%d\n" - "\tDSAY:%d\n\n" - , TRXPOS.DIRY, TRXPOS.SSAY, TRXPOS.SSAX, TRXPOS.DIRX, TRXPOS.DSAX, TRXPOS.DSAY); - - fprintf(fp, "TRXREG\n" - "\tRRH:%d\n" - "\tRRW:%d\n\n" - , TRXREG.RRH, TRXREG.RRW); - - fclose(fp); - } - -}; diff --git a/plugins/GSdx_legacy/GSDump.cpp b/plugins/GSdx_legacy/GSDump.cpp deleted file mode 100644 index 973fe6acb8..0000000000 --- a/plugins/GSdx_legacy/GSDump.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDump.h" - -GSDump::GSDump() - : m_gs(NULL) - , m_frames(0) - , m_extra_frames(0) -{ -} - -GSDump::~GSDump() -{ - Close(); -} - -void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs) -{ - m_gs = fopen((fn + ".gs").c_str(), "wb"); - - m_frames = 0; - m_extra_frames = 2; - - if(m_gs) - { - fwrite(&crc, 4, 1, m_gs); - fwrite(&fd.size, 4, 1, m_gs); - fwrite(fd.data, fd.size, 1, m_gs); - fwrite(regs, sizeof(*regs), 1, m_gs); - } -} - -void GSDump::Close() -{ - if(m_gs) {fclose(m_gs); m_gs = NULL;} -} - -void GSDump::Transfer(int index, const uint8* mem, size_t size) -{ - if(m_gs && size > 0) - { - fputc(0, m_gs); - fputc(index, m_gs); - fwrite(&size, 4, 1, m_gs); - fwrite(mem, size, 1, m_gs); - } -} - -void GSDump::ReadFIFO(uint32 size) -{ - if(m_gs && size > 0) - { - fputc(2, m_gs); - fwrite(&size, 4, 1, m_gs); - } -} - -void GSDump::VSync(int field, bool last, const GSPrivRegSet* regs) -{ - if(m_gs) - { - fputc(3, m_gs); - fwrite(regs, sizeof(*regs), 1, m_gs); - - fputc(1, m_gs); - fputc(field, m_gs); - - if((++m_frames & 1) == 0 && last && (m_extra_frames <= 0)) - { - Close(); - } else if (last) { - m_extra_frames--; - } - } -} diff --git a/plugins/GSdx_legacy/GSDump.h b/plugins/GSdx_legacy/GSDump.h deleted file mode 100644 index d8e31262b0..0000000000 --- a/plugins/GSdx_legacy/GSDump.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSVertexSW.h" - -/* - -Dump file format: -- [crc/4] [state size/4] [state data/size] [PMODE/0x2000] [id/1] [data/?] .. [id/1] [data/?] - -Transfer data (id == 0) -- [0/1] [path index/1] [size/4] [data/size] - -VSync data (id == 1) -- [1/1] [field/1] - -ReadFIFO2 data (id == 2) -- [2/1] [size/?] - -Regs data (id == 3) -- [PMODE/0x2000] - -*/ - -class GSDump -{ - FILE* m_gs; - int m_frames; - int m_extra_frames; - -public: - GSDump(); - virtual ~GSDump(); - - void Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs); - void Close(); - void ReadFIFO(uint32 size); - void Transfer(int index, const uint8* mem, size_t size); - void VSync(int field, bool last, const GSPrivRegSet* regs); - operator bool() {return m_gs != NULL;} -}; diff --git a/plugins/GSdx_legacy/GSFunctionMap.cpp b/plugins/GSdx_legacy/GSFunctionMap.cpp deleted file mode 100644 index 1b1974c0ea..0000000000 --- a/plugins/GSdx_legacy/GSFunctionMap.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSFunctionMap.h" diff --git a/plugins/GSdx_legacy/GSFunctionMap.h b/plugins/GSdx_legacy/GSFunctionMap.h deleted file mode 100644 index 3876679cc3..0000000000 --- a/plugins/GSdx_legacy/GSFunctionMap.h +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSCodeBuffer.h" -#include "xbyak/xbyak.h" -#include "xbyak/xbyak_util.h" - -template class GSFunctionMap -{ -protected: - struct ActivePtr - { - uint64 frame, frames; - uint64 ticks, actual, total; - VALUE f; - }; - - hash_map m_map; - hash_map m_map_active; - - ActivePtr* m_active; - - virtual VALUE GetDefaultFunction(KEY key) = 0; - -public: - GSFunctionMap() - : m_active(NULL) - { - } - - virtual ~GSFunctionMap() - { - for_each(m_map_active.begin(), m_map_active.end(), delete_second()); - } - - VALUE operator [] (KEY key) - { - m_active = NULL; - - typename hash_map::iterator i = m_map_active.find(key); - - if(i != m_map_active.end()) - { - m_active = i->second; - } - else - { - typename hash_map::iterator i = m_map.find(key); - - ActivePtr* p = new ActivePtr(); - - memset(p, 0, sizeof(*p)); - - p->frame = (uint64)-1; - - p->f = i != m_map.end() ? i->second : GetDefaultFunction(key); - - m_map_active[key] = p; - - m_active = p; - } - - return m_active->f; - } - - void UpdateStats(uint64 frame, uint64 ticks, int actual, int total) - { - if(m_active) - { - if(m_active->frame != frame) - { - m_active->frame = frame; - m_active->frames++; - } - - m_active->ticks += ticks; - m_active->actual += actual; - m_active->total += total; - - ASSERT(m_active->total >= m_active->actual); - } - } - - virtual void PrintStats() - { - uint64 ttpf = 0; - - typename hash_map::iterator i; - - for(i = m_map_active.begin(); i != m_map_active.end(); i++) - { - ActivePtr* p = i->second; - - if(p->frames) - { - ttpf += p->ticks / p->frames; - } - } - - printf("GS stats\n"); - - for(i = m_map_active.begin(); i != m_map_active.end(); i++) - { - KEY key = i->first; - ActivePtr* p = i->second; - - if(p->frames && ttpf) - { - uint64 tpp = p->actual > 0 ? p->ticks / p->actual : 0; - uint64 tpf = p->frames > 0 ? p->ticks / p->frames : 0; - uint64 ppf = p->frames > 0 ? p->actual / p->frames : 0; - - printf("[%014llx]%c %6.2f%% %5.2f%% f %4lld t %12lld p %12lld w %12lld tpp %4lld tpf %9lld ppf %9lld\n", - (uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ', - (float)(tpf * 10000 / 34000000) / 100, - (float)(tpf * 10000 / ttpf) / 100, - p->frames, p->ticks, p->actual, p->total - p->actual, - tpp, tpf, ppf); - } - } - } -}; - -class GSCodeGenerator : public Xbyak::CodeGenerator -{ -protected: - Xbyak::util::Cpu m_cpu; - -public: - GSCodeGenerator(void* code, size_t maxsize) - : Xbyak::CodeGenerator(maxsize, code) - { - } -}; - -template -class GSCodeGeneratorFunctionMap : public GSFunctionMap -{ - string m_name; - void* m_param; - hash_map m_cgmap; - GSCodeBuffer m_cb; - - enum {MAX_SIZE = 8192}; - -public: - GSCodeGeneratorFunctionMap(const char* name, void* param) - : m_name(name) - , m_param(param) - { - } - - VALUE GetDefaultFunction(KEY key) - { - VALUE ret = NULL; - - typename hash_map::iterator i = m_cgmap.find(key); - - if(i != m_cgmap.end()) - { - ret = i->second; - } - else - { - CG* cg = new CG(m_param, key, m_cb.GetBuffer(MAX_SIZE), MAX_SIZE); - - ASSERT(cg->getSize() < MAX_SIZE); - - m_cb.ReleaseBuffer(cg->getSize()); - - ret = (VALUE)cg->getCode(); - - m_cgmap[key] = ret; - - #ifdef ENABLE_VTUNE - - // vtune method registration - - // if(iJIT_IsProfilingActive()) // always > 0 - { - string name = format("%s<%016llx>()", m_name.c_str(), (uint64)key); - - iJIT_Method_Load ml; - - memset(&ml, 0, sizeof(ml)); - - ml.method_id = iJIT_GetNewMethodID(); - ml.method_name = (char*)name.c_str(); - ml.method_load_address = (void*)cg->getCode(); - ml.method_size = (unsigned int)cg->getSize(); - - iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml); -/* - name = format("c:/temp1/%s_%016llx.bin", m_name.c_str(), (uint64)key); - - if(FILE* fp = fopen(name.c_str(), "wb")) - { - fputc(0x0F, fp); fputc(0x0B, fp); - fputc(0xBB, fp); fputc(0x6F, fp); fputc(0x00, fp); fputc(0x00, fp); fputc(0x00, fp); - fputc(0x64, fp); fputc(0x67, fp); fputc(0x90, fp); - - fwrite(cg->getCode(), cg->getSize(), 1, fp); - - fputc(0xBB, fp); fputc(0xDE, fp); fputc(0x00, fp); fputc(0x00, fp); fputc(0x00, fp); - fputc(0x64, fp); fputc(0x67, fp); fputc(0x90, fp); - fputc(0x0F, fp); fputc(0x0B, fp); - - fclose(fp); - } -*/ - } - - #endif - - delete cg; - } - - return ret; - } -}; diff --git a/plugins/GSdx_legacy/GSLinuxDialog.cpp b/plugins/GSdx_legacy/GSLinuxDialog.cpp deleted file mode 100644 index 369abe0c3c..0000000000 --- a/plugins/GSdx_legacy/GSLinuxDialog.cpp +++ /dev/null @@ -1,520 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include -#include "GS.h" -#include "GSdx.h" -#include "GSLinuxLogo.h" -#include "GSSetting.h" - -void AddTooltip(GtkWidget* w, int idc) -{ - gtk_widget_set_tooltip_text(w, dialog_message(idc)); -} - -void AddTooltip(GtkWidget* w1, GtkWidget* w2, int idc) -{ - AddTooltip(w1, idc); - AddTooltip(w2, idc); -} - -GtkWidget* left_label(const char* lbl) -{ - GtkWidget* w = gtk_label_new(lbl); -#if GTK_MAJOR_VERSION >= 3 - gtk_widget_set_halign(w, GTK_ALIGN_START); -#else - gtk_misc_set_alignment(GTK_MISC(w),0.0,0.5); -#endif - return w; -} - -void CB_ChangedComboBox(GtkComboBox *combo, gpointer user_data) -{ - int p = gtk_combo_box_get_active(combo); - vector* s = (vector*)g_object_get_data(G_OBJECT(combo), "Settings"); - - try { - theApp.SetConfig((char*)user_data, s->at(p).value); - } catch (...) { - } -} - -GtkWidget* CreateComboBoxFromVector(const vector& s, const char* opt_name, int32_t opt_default = 0) -{ - GtkWidget* combo_box = gtk_combo_box_text_new(); - int32_t opt_value = theApp.GetConfig(opt_name, opt_default); - int opt_position = 0; - - for(size_t i = 0; i < s.size(); i++) - { - string label = s[i].name; - - if(!s[i].note.empty()) label += format(" (%s)", s[i].note.c_str()); - - gtk_combo_box_text_append_text(GTK_COMBO_BOX_TEXT(combo_box), label.c_str()); - - if (s[i].value == opt_value) - opt_position = i; - } - - gtk_combo_box_set_active(GTK_COMBO_BOX(combo_box), opt_position); - - g_signal_connect(combo_box, "changed", G_CALLBACK(CB_ChangedComboBox), const_cast(opt_name)); - g_object_set_data(G_OBJECT(combo_box), "Settings", (void*)&s); - - return combo_box; -} - -void CB_PreEntryActived(GtkEntry *entry, gchar* preedit, gpointer user_data) -{ - int hex_value = 0; - sscanf(preedit,"%X",&hex_value); - - theApp.SetConfig((char*)user_data, hex_value); -} - -void CB_EntryActived(GtkEntry *entry, gpointer user_data) -{ - int hex_value = 0; - const gchar *data = gtk_entry_get_text(entry); - sscanf(data,"%X",&hex_value); - - theApp.SetConfig((char*)user_data, hex_value); -} - -GtkWidget* CreateTextBox(const char* opt_name, int opt_default = 0) { - GtkWidget* entry = gtk_entry_new(); - - int hex_value = theApp.GetConfig(opt_name, opt_default); - - gchar* data=(gchar *)g_malloc(sizeof(gchar)*40); - sprintf(data,"%X", hex_value); - gtk_entry_set_text(GTK_ENTRY(entry),data); - g_free(data); - - g_signal_connect(entry, "activate", G_CALLBACK(CB_EntryActived), const_cast(opt_name)); - // Note it doesn't seem to work as expected - g_signal_connect(entry, "preedit-changed", G_CALLBACK(CB_PreEntryActived), const_cast(opt_name)); - - return entry; -} - -void CB_ToggleCheckBox(GtkToggleButton *togglebutton, gpointer user_data) -{ - theApp.SetConfig((char*)user_data, (int)gtk_toggle_button_get_active(togglebutton)); -} - -GtkWidget* CreateCheckBox(const char* label, const char* opt_name, bool opt_default = false) -{ - GtkWidget* check = gtk_check_button_new_with_label(label); - - gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(check), theApp.GetConfig(opt_name, opt_default)); - - g_signal_connect(check, "toggled", G_CALLBACK(CB_ToggleCheckBox), const_cast(opt_name)); - - return check; -} - -void CB_SpinButton(GtkSpinButton *spin, gpointer user_data) -{ - theApp.SetConfig((char*)user_data, (int)gtk_spin_button_get_value(spin)); -} - -GtkWidget* CreateSpinButton(double min, double max, const char* opt_name, int opt_default = 0) -{ - GtkWidget* spin = gtk_spin_button_new_with_range(min, max, 1); - - gtk_spin_button_set_value(GTK_SPIN_BUTTON(spin), theApp.GetConfig(opt_name, opt_default)); - - g_signal_connect(spin, "value-changed", G_CALLBACK(CB_SpinButton), const_cast(opt_name)); - - return spin; -} - -void CB_RangeChanged(GtkRange* range, gpointer user_data) -{ - theApp.SetConfig((char*)user_data, (int)gtk_range_get_value(range)); -} - -GtkWidget* CreateScale(const char* opt_name, int opt_default = 0) -{ -#if GTK_MAJOR_VERSION < 3 - GtkWidget* scale = gtk_hscale_new_with_range(0, 200, 10); -#else - GtkWidget* scale = gtk_scale_new_with_range(GTK_ORIENTATION_HORIZONTAL, 0, 200, 10); -#endif - - gtk_scale_set_value_pos(GTK_SCALE(scale), GTK_POS_RIGHT); - gtk_range_set_value(GTK_RANGE(scale), theApp.GetConfig(opt_name, opt_default)); - - g_signal_connect(scale, "value-changed", G_CALLBACK(CB_RangeChanged), const_cast(opt_name)); - - return scale; -} - -void CB_PickFile(GtkFileChooserButton *chooser, gpointer user_data) -{ - theApp.SetConfig((char*)user_data, gtk_file_chooser_get_filename(GTK_FILE_CHOOSER(chooser))); -} - -GtkWidget* CreateFileChooser(GtkFileChooserAction action, const char* label, const char* opt_name, const char* opt_default) -{ - GtkWidget* chooser = gtk_file_chooser_button_new(label, action); - - gtk_file_chooser_set_filename(GTK_FILE_CHOOSER(chooser), theApp.GetConfig(opt_name, opt_default).c_str()); - - g_signal_connect(chooser, "file-set", G_CALLBACK(CB_PickFile), const_cast(opt_name)); - - return chooser; -} - -static int s_table_line = 0; -static void InsertWidgetInTable(GtkWidget* table, GtkWidget *left, GtkWidget *right = NULL, GtkWidget *third = NULL) { - GtkAttachOptions opt = (GtkAttachOptions)(GTK_EXPAND | GTK_FILL); // default - guint l_xpad = GTK_IS_CHECK_BUTTON(left) ? 0 : 22; - guint r_xpad = 0; - guint ypad = 0; - if (!left) { - gtk_table_attach(GTK_TABLE(table), right, 1, 2, s_table_line, s_table_line+1, opt, opt, r_xpad, ypad); - } else if (!right) { - gtk_table_attach(GTK_TABLE(table), left, 0, 1, s_table_line, s_table_line+1, opt, opt, l_xpad, ypad); - } else if (right == left) { - gtk_table_attach(GTK_TABLE(table), left, 0, 2, s_table_line, s_table_line+1, opt, opt, r_xpad, ypad); - } else { - gtk_table_attach(GTK_TABLE(table), left, 0, 1, s_table_line, s_table_line+1, opt, opt, l_xpad, ypad); - gtk_table_attach(GTK_TABLE(table), right, 1, 2, s_table_line, s_table_line+1, opt, opt, r_xpad, ypad); - } - if (third) { - gtk_table_attach(GTK_TABLE(table), third, 2, 3, s_table_line, s_table_line+1, opt, opt, r_xpad, ypad); - } - s_table_line++; -} - -GtkWidget* CreateTableInBox(GtkWidget* parent_box, const char* frame_title, int row, int col) { - GtkWidget* table = gtk_table_new(row, col, false); - GtkWidget* container = (frame_title) ? gtk_frame_new (frame_title) : gtk_vbox_new(false, 5); - gtk_container_add(GTK_CONTAINER(container), table); - gtk_container_add(GTK_CONTAINER(parent_box), container); - - return table; -} - -void populate_hw_table(GtkWidget* hw_table) -{ - GtkWidget* filter_label = left_label("Texture Filtering:"); - GtkWidget* filter_combo_box = CreateComboBoxFromVector(theApp.m_gs_filter, "filter", 2); - - GtkWidget* fsaa_label = left_label("Internal Resolution:"); - GtkWidget* fsaa_combo_box = CreateComboBoxFromVector(theApp.m_gs_upscale_multiplier, "upscale_multiplier", 1); - - GtkWidget* af_label = left_label("Anisotropic Filtering:"); - GtkWidget* af_combo_box = CreateComboBoxFromVector(theApp.m_gs_max_anisotropy, "MaxAnisotropy", 0); - - GtkWidget* crc_label = left_label("Automatic CRC level:"); - GtkWidget* crc_combo_box = CreateComboBoxFromVector(theApp.m_gs_crc_level, "crc_hack_level", 3); - - GtkWidget* paltex_check = CreateCheckBox("Allow 8 bits textures", "paltex"); - GtkWidget* acc_date_check = CreateCheckBox("Accurate Date", "accurate_date", false); - GtkWidget* tc_depth_check = CreateCheckBox("Full Depth Emulation", "texture_cache_depth", true); - - GtkWidget* acc_bld_label = left_label("Blending Unit Accuracy:"); - GtkWidget* acc_bld_combo_box = CreateComboBoxFromVector(theApp.m_gs_acc_blend_level, "accurate_blending_unit", 1); - - // Some helper string - AddTooltip(paltex_check, IDC_PALTEX); - AddTooltip(acc_date_check, IDC_ACCURATE_DATE); - AddTooltip(crc_label, crc_combo_box, IDC_CRC_LEVEL); - AddTooltip(acc_bld_label, acc_bld_combo_box, IDC_ACCURATE_BLEND_UNIT); - AddTooltip(tc_depth_check, IDC_TC_DEPTH); - AddTooltip(filter_label, filter_combo_box, IDC_FILTER); - AddTooltip(af_label, af_combo_box, IDC_AFCOMBO); - - s_table_line = 0; - InsertWidgetInTable(hw_table, paltex_check, tc_depth_check); - InsertWidgetInTable(hw_table, acc_date_check); - InsertWidgetInTable(hw_table, fsaa_label, fsaa_combo_box); - InsertWidgetInTable(hw_table, filter_label, filter_combo_box); - InsertWidgetInTable(hw_table, af_label, af_combo_box); - InsertWidgetInTable(hw_table, acc_bld_label, acc_bld_combo_box); - InsertWidgetInTable(hw_table, crc_label, crc_combo_box); -} - -void populate_gl_table(GtkWidget* gl_table) -{ - GtkWidget* gl_bs_label = left_label("Buffer Storage:"); - GtkWidget* gl_bs_combo = CreateComboBoxFromVector(theApp.m_gs_gl_ext, "override_GL_ARB_buffer_storage", -1); - GtkWidget* gl_sso_label = left_label("Separate Shader:"); - GtkWidget* gl_sso_combo = CreateComboBoxFromVector(theApp.m_gs_gl_ext, "override_GL_ARB_separate_shader_objects", -1); - GtkWidget* gl_gs_label = left_label("Geometry Shader:"); - GtkWidget* gl_gs_combo = CreateComboBoxFromVector(theApp.m_gs_gl_ext, "override_geometry_shader", -1); - GtkWidget* gl_ils_label = left_label("Image Load Store:"); - GtkWidget* gl_ils_combo = CreateComboBoxFromVector(theApp.m_gs_gl_ext, "override_GL_ARB_shader_image_load_store", -1); - GtkWidget* gl_cc_label = left_label("Clip Control (depth accuracy):"); - GtkWidget* gl_cc_combo = CreateComboBoxFromVector(theApp.m_gs_gl_ext, "override_GL_ARB_clip_control", -1); - GtkWidget* gl_tb_label = left_label("Texture Barrier:"); - GtkWidget* gl_tb_combo = CreateComboBoxFromVector(theApp.m_gs_gl_ext, "override_GL_ARB_texture_barrier", -1); - - s_table_line = 0; - InsertWidgetInTable(gl_table , gl_gs_label , gl_gs_combo); - InsertWidgetInTable(gl_table , gl_bs_label , gl_bs_combo); - InsertWidgetInTable(gl_table , gl_sso_label , gl_sso_combo); - InsertWidgetInTable(gl_table , gl_ils_label , gl_ils_combo); - InsertWidgetInTable(gl_table , gl_cc_label , gl_cc_combo); - InsertWidgetInTable(gl_table , gl_tb_label , gl_tb_combo); -} - -void populate_sw_table(GtkWidget* sw_table) -{ - GtkWidget* threads_label = left_label("Extra rendering threads:"); - GtkWidget* threads_spin = CreateSpinButton(0, 32, "extrathreads", DEFAULT_EXTRA_RENDERING_THREADS); - - GtkWidget* aa_check = CreateCheckBox("Edge anti-aliasing (AA1)", "aa1"); - GtkWidget* mipmap_check = CreateCheckBox("Mipmap", "mipmap", true); - - AddTooltip(aa_check, IDC_AA1); - AddTooltip(mipmap_check, IDC_MIPMAP); - AddTooltip(threads_label, threads_spin, IDC_SWTHREADS); - - s_table_line = 0; - InsertWidgetInTable(sw_table , threads_label , threads_spin); - InsertWidgetInTable(sw_table , aa_check, mipmap_check); -} - -void populate_shader_table(GtkWidget* shader_table) -{ - GtkWidget* shader = CreateFileChooser(GTK_FILE_CHOOSER_ACTION_OPEN, "Select an external shader", "shaderfx_glsl", "dummy.glsl"); - GtkWidget* shader_conf = CreateFileChooser(GTK_FILE_CHOOSER_ACTION_OPEN, "Then select a config", "shaderfx_conf", "dummy.ini"); - GtkWidget* shader_label = left_label("External shader glsl"); - GtkWidget* shader_conf_label = left_label("External shader conf"); - - GtkWidget* shadeboost_check = CreateCheckBox("Shade boost", "ShadeBoost"); - GtkWidget* fxaa_check = CreateCheckBox("Fxaa shader", "fxaa"); - GtkWidget* shaderfx_check = CreateCheckBox("External shader", "shaderfx"); - - GtkWidget* tv_shader_label = left_label("TV shader:"); - GtkWidget* tv_shader = CreateComboBoxFromVector(theApp.m_gs_tv_shaders, "TVShader"); - - // Shadeboost scale - GtkWidget* sb_brightness = CreateScale("ShadeBoost_Brightness", 50); - GtkWidget* sb_brightness_label = left_label("Shade Boost Brightness:"); - - GtkWidget* sb_contrast = CreateScale("ShadeBoost_Contrast", 50); - GtkWidget* sb_contrast_label = left_label("Shade Boost Contrast:"); - - GtkWidget* sb_saturation = CreateScale("ShadeBoost_Saturation", 50); - GtkWidget* sb_saturation_label = left_label("Shade Boost Saturation:"); - - AddTooltip(shadeboost_check, IDC_SHADEBOOST); - AddTooltip(shaderfx_check, IDC_SHADER_FX); - AddTooltip(fxaa_check, IDC_FXAA); - - s_table_line = 0; - InsertWidgetInTable(shader_table , fxaa_check); - InsertWidgetInTable(shader_table , shadeboost_check); - InsertWidgetInTable(shader_table , sb_brightness_label , sb_brightness); - InsertWidgetInTable(shader_table , sb_contrast_label , sb_contrast); - InsertWidgetInTable(shader_table , sb_saturation_label , sb_saturation); - InsertWidgetInTable(shader_table , shaderfx_check); - InsertWidgetInTable(shader_table , shader_label , shader); - InsertWidgetInTable(shader_table , shader_conf_label , shader_conf); - InsertWidgetInTable(shader_table , tv_shader_label, tv_shader); -} - -void populate_hack_table(GtkWidget* hack_table) -{ - GtkWidget* hack_offset_check = CreateCheckBox("Half-pixel Offset Hack", "UserHacks_HalfPixelOffset"); - GtkWidget* hack_skipdraw_label = left_label("Skipdraw:"); - GtkWidget* hack_skipdraw_spin = CreateSpinButton(0, 1000, "UserHacks_SkipDraw"); - GtkWidget* hack_enble_check = CreateCheckBox("Enable User Hacks", "UserHacks"); - GtkWidget* hack_wild_check = CreateCheckBox("Wild Arms Hack", "UserHacks_WildHack"); - GtkWidget* hack_tco_label = left_label("Texture Offset: 0x"); - GtkWidget* hack_tco_entry = CreateTextBox("UserHacks_TCOffset"); - GtkWidget* align_sprite_check = CreateCheckBox("Align sprite hack", "UserHacks_align_sprite_X"); - GtkWidget* preload_gs_check = CreateCheckBox("Preload Frame", "preload_frame_with_gs_data"); - GtkWidget* hack_safe_fbmask = CreateCheckBox("Safe Accurate Blending", "UserHacks_safe_fbmask"); - GtkWidget* hack_fast_inv = CreateCheckBox("Fast Texture Invalidation", "UserHacks_DisablePartialInvalidation"); - - GtkWidget* hack_sprite_box = CreateComboBoxFromVector(theApp.m_gs_hack, "UserHacks_SpriteHack"); - GtkWidget* hack_sprite_label = left_label("Alpha-Sprite Hack:"); - GtkWidget* stretch_hack_box = CreateComboBoxFromVector(theApp.m_gs_hack, "UserHacks_round_sprite_offset"); - GtkWidget* stretch_hack_label = left_label("Align Sprite Texture:"); - - // Reuse windows helper string :) - AddTooltip(hack_offset_check, IDC_OFFSETHACK); - AddTooltip(hack_skipdraw_label, IDC_SKIPDRAWHACK); - AddTooltip(hack_skipdraw_spin, IDC_SKIPDRAWHACK); - gtk_widget_set_tooltip_text(hack_enble_check, "Allows the use of the hack below"); - AddTooltip(hack_wild_check, IDC_WILDHACK); - AddTooltip(hack_sprite_label, hack_sprite_box, IDC_SPRITEHACK); - AddTooltip(hack_tco_label, IDC_TCOFFSETX); - AddTooltip(hack_tco_entry, IDC_TCOFFSETX); - AddTooltip(align_sprite_check, IDC_ALIGN_SPRITE); - AddTooltip(stretch_hack_label, stretch_hack_box, IDC_ROUND_SPRITE); - AddTooltip(preload_gs_check, IDC_PRELOAD_GS); - AddTooltip(hack_safe_fbmask, IDC_SAFE_FBMASK); - AddTooltip(hack_fast_inv, IDC_FAST_TC_INV); - - - s_table_line = 0; - InsertWidgetInTable(hack_table , hack_enble_check); - InsertWidgetInTable(hack_table , hack_wild_check , align_sprite_check); - InsertWidgetInTable(hack_table , hack_offset_check , preload_gs_check); - InsertWidgetInTable(hack_table , hack_safe_fbmask , hack_fast_inv); - InsertWidgetInTable(hack_table , hack_sprite_label , hack_sprite_box ); - InsertWidgetInTable(hack_table , stretch_hack_label , stretch_hack_box ); - InsertWidgetInTable(hack_table , hack_skipdraw_label , hack_skipdraw_spin); - InsertWidgetInTable(hack_table , hack_tco_label , hack_tco_entry); -} - -void populate_main_table(GtkWidget* main_table) -{ - GtkWidget* render_label = left_label("Renderer:"); - GtkWidget* render_combo_box = CreateComboBoxFromVector(theApp.m_gs_renderers, "Renderer", static_cast(GSRendererType::Default)); - GtkWidget* interlace_label = left_label("Interlacing (F5):"); - GtkWidget* interlace_combo_box = CreateComboBoxFromVector(theApp.m_gs_interlace, "interlace", 7); - - s_table_line = 0; - InsertWidgetInTable(main_table, render_label, render_combo_box); - InsertWidgetInTable(main_table, interlace_label, interlace_combo_box); -} - -void populate_debug_table(GtkWidget* debug_table) -{ - GtkWidget* glsl_debug_check = CreateCheckBox("GLSL compilation", "debug_glsl_shader"); - GtkWidget* gl_debug_check = CreateCheckBox("Print GL error", "debug_opengl"); - GtkWidget* gs_dump_check = CreateCheckBox("Dump GS data", "dump"); - GtkWidget* gs_save_check = CreateCheckBox("Save RT", "save"); - GtkWidget* gs_savef_check = CreateCheckBox("Save Frame", "savef"); - GtkWidget* gs_savet_check = CreateCheckBox("Save Texture", "savet"); - GtkWidget* gs_savez_check = CreateCheckBox("Save Depth", "savez"); - - GtkWidget* gs_saven_label = left_label("Start of Dump"); - GtkWidget* gs_saven_spin = CreateSpinButton(0, pow(10, 9), "saven"); - GtkWidget* gs_savel_label = left_label("Length of Dump"); - GtkWidget* gs_savel_spin = CreateSpinButton(0, pow(10, 5), "savel"); - - s_table_line = 0; - InsertWidgetInTable(debug_table, gl_debug_check, glsl_debug_check); - InsertWidgetInTable(debug_table, gs_dump_check); - InsertWidgetInTable(debug_table, gs_save_check, gs_savef_check); - InsertWidgetInTable(debug_table, gs_savet_check, gs_savez_check); - InsertWidgetInTable(debug_table, gs_saven_label, gs_saven_spin); - InsertWidgetInTable(debug_table, gs_savel_label, gs_savel_spin); -} - -void populate_record_table(GtkWidget* record_table) -{ - GtkWidget* capture_check = CreateCheckBox("Enable Recording (with F12)", "capture_enabled"); - GtkWidget* resxy_label = left_label("Resolution:"); - GtkWidget* resx_spin = CreateSpinButton(256, 8192, "capture_resx", 1280); - GtkWidget* resy_spin = CreateSpinButton(256, 8192, "capture_resy", 1024); - GtkWidget* threads_label = left_label("Saving Threads:"); - GtkWidget* threads_spin = CreateSpinButton(1, 32, "capture_threads", 4); - GtkWidget* out_dir_label = left_label("Output Directory:"); - GtkWidget* out_dir = CreateFileChooser(GTK_FILE_CHOOSER_ACTION_SELECT_FOLDER, "Select a directory", "capture_out_dir", "/tmp"); - GtkWidget* png_label = left_label("PNG Compression Level:"); - GtkWidget* png_level = CreateSpinButton(1, 9, "png_compression_level", 1); - - InsertWidgetInTable(record_table , capture_check); - InsertWidgetInTable(record_table , resxy_label , resx_spin , resy_spin); - InsertWidgetInTable(record_table , threads_label , threads_spin); - InsertWidgetInTable(record_table , png_label , png_level); - InsertWidgetInTable(record_table , out_dir_label , out_dir); -} - -bool RunLinuxDialog() -{ - GtkWidget *dialog; - int return_value; - - /* Create the widgets */ - dialog = gtk_dialog_new_with_buttons ( - "GSdx Config", - NULL, /* parent window*/ - (GtkDialogFlags)(GTK_DIALOG_MODAL | GTK_DIALOG_DESTROY_WITH_PARENT), - "OK", GTK_RESPONSE_ACCEPT, - // "Cancel", GTK_RESPONSE_REJECT, // Drop because it is too annoying to support call back this way - NULL); - - // The main area for the whole dialog box. - GtkWidget* main_box = gtk_vbox_new(false, 5); - GtkWidget* central_box = gtk_vbox_new(false, 5); - GtkWidget* advanced_box = gtk_vbox_new(false, 5); - GtkWidget* debug_box = gtk_vbox_new(false, 5); - - // Grab a logo, to make things look nice. - GdkPixbuf* logo_pixmap = gdk_pixbuf_from_pixdata(&gsdx_ogl_logo, false, NULL); - GtkWidget* logo_image = gtk_image_new_from_pixbuf(logo_pixmap); - gtk_box_pack_start(GTK_BOX(main_box), logo_image, true, true, 0); - - GtkWidget* main_table = CreateTableInBox(main_box , NULL , 2 , 2); - - GtkWidget* shader_table = CreateTableInBox(central_box , "Custom Shader Settings" , 9 , 2); - GtkWidget* hw_table = CreateTableInBox(central_box , "Hardware Mode Settings" , 7 , 2); - GtkWidget* sw_table = CreateTableInBox(central_box , "Software Mode Settings" , 2 , 2); - - GtkWidget* hack_table = CreateTableInBox(advanced_box, "Hacks" , 7 , 2); - GtkWidget* gl_table = CreateTableInBox(advanced_box, "OpenGL Very Advanced Custom Settings" , 6 , 2); - - GtkWidget* record_table = CreateTableInBox(debug_box , "Recording Settings" , 4 , 3); - GtkWidget* debug_table = CreateTableInBox(debug_box , "OpenGL / GSdx Debug Settings" , 6 , 3); - - // Populate all the tables - populate_main_table(main_table); - - populate_shader_table(shader_table); - populate_hw_table(hw_table); - populate_sw_table(sw_table); - - populate_hack_table(hack_table); - populate_gl_table(gl_table); - - populate_debug_table(debug_table); - populate_record_table(record_table); - - // Handle some nice tab - GtkWidget* notebook = gtk_notebook_new(); - gtk_notebook_append_page(GTK_NOTEBOOK(notebook), central_box , gtk_label_new("Global Settings")); - gtk_notebook_append_page(GTK_NOTEBOOK(notebook), advanced_box, gtk_label_new("Advanced Settings")); - gtk_notebook_append_page(GTK_NOTEBOOK(notebook), debug_box , gtk_label_new("Debug/Recording Settings")); - - // Put everything in the big box. - gtk_container_add(GTK_CONTAINER(main_box), notebook); - - // Put the box in the dialog and show it to the world. - gtk_container_add (GTK_CONTAINER(gtk_dialog_get_content_area(GTK_DIALOG(dialog))), main_box); - gtk_widget_show_all (dialog); - return_value = gtk_dialog_run (GTK_DIALOG (dialog)); - - // Compatibility & not supported option - int mode_width = theApp.GetConfig("ModeWidth", 640); - int mode_height = theApp.GetConfig("ModeHeight", 480); - theApp.SetConfig("ModeHeight", mode_height); - theApp.SetConfig("ModeWidth", mode_width); - theApp.SetConfig("msaa", 0); - theApp.SetConfig("windowed", 1); - - gtk_widget_destroy (dialog); - - return (return_value == GTK_RESPONSE_ACCEPT); -} diff --git a/plugins/GSdx_legacy/GSLinuxLogo.h b/plugins/GSdx_legacy/GSLinuxLogo.h deleted file mode 100644 index 1080969147..0000000000 --- a/plugins/GSdx_legacy/GSLinuxLogo.h +++ /dev/null @@ -1,1617 +0,0 @@ -/* - * Generated file - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -/* GdkPixbuf RGB C-Source image dump 1-byte-run-length-encoded */ - -#include - -//gdk-pixbuf-csource res/logo-ogl.bmp --struct >! GSLinuxLogo.h - -static const GdkPixdata gsdx_ogl_logo = { - 0x47646b50, /* Pixbuf magic: 'GdkP' */ - 24 + 43884, /* header length + pixel_data length */ - 0x2010001, /* pixdata_type */ - 786, /* rowstride */ - 262, /* width */ - 71, /* height */ - /* pixel_data: */ - (unsigned char*)"\7\22\24\26\22\25\27\22\24\26\21\25\26\22\24\26\22\24\27\22\24\26\202" - "\21\24\26\10\22\25\26\22\24\26\22\25\27\22\24\26\22\25\27\21\24\27\22" - "\24\26\21\24\26\202\22\24\26\1\21\24\26\202\22\24\27\202\22\24\26\202" - "\21\24\26\3\22\24\27\22\24\26\22\25\26\202\21\24\27\4\22\24\27\21\25" - "\27\22\24\27\22\24\26\204\22\24\27\6\21\24\27\22\24\26\22\24\27\22\24" - "\26\21\24\27\22\24\27\202\21\24\26\1\22\24\27\202\22\24\26\36\21\25\26" - "\22\24\26\22\24\27\21\24\26\22\24\26\21\24\26\22\25\26\21\24\27\21\25" - "\26\21\24\26\21\24\27\22\24\26\22\25\27\22\24\27\21\25\26\22\24\27\22" - "\25\27\21\24\27\22\25\26\21\24\26\22\25\26\21\25\27\22\24\27\22\24\26" - "\22\24\27\22\24\26\22\24\27\21\25\26\21\24\26\21\25\27\202\22\24\26\11" - "\22\24\27\21\24\27\22\24\26\21\24\27\22\24\27\22\25\27\22\24\26\21\24" - "\26\21\24\27\202\21\24\26\5\22\24\27\22\24\26\21\24\27\22\24\27\22\25" - "\26\202\22\24\26\202\21\24\26\204\22\24\26\7\22\24\27\21\25\26\22\24" - "\27\21\24\26\22\24\26\21\24\27\22\24\26\202\22\24\27\4\22\24\26\22\24" - "\27\21\25\26\22\24\27\203\22\24\26\203\22\24\27\202\22\25\26\1\22\24" - "\26\202\21\24\26\11\22\24\27\22\24\26\21\25\26\22\24\26\22\24\27\22\24" - "\26\21\24\26\22\24\26\21\24\26\202\21\24\27\203\22\24\26\2\22\25\26\21" - "\24\26\202\22\24\27\11\22\24\26\22\25\26\21\24\27\21\25\26\22\24\26\22" - "\24\27\21\24\26\22\24\27\21\24\27\202\22\24\27\4\21\25\26\22\25\26\22" - "\24\26\22\25\26\202\22\24\27\202\22\24\26\202\22\24\27\2\21\24\27\22" - "\25\26\203\22\24\26\1\21\24\26\202\22\24\26\11\21\24\27\22\25\26\22\24" - "\26\21\24\26\22\24\26\21\24\26\22\25\27\22\25\26\22\24\27\202\22\25\26" - "\21\22\24\27\22\24\26\22\24\27\21\25\27\22\25\27\22\25\26\22\24\26\22" - "\24\27\22\25\27\22\24\26\22\24\27\21\24\27\22\24\27\22\25\27\22\24\27" - "\22\24\26\21\25\27\202\22\25\26\203\22\24\26\203\21\24\26\204\22\24\26" - "\4\22\24\27\22\25\26\21\25\27\22\24\26\203\22\24\27\4\22\25\26\22\24" - "\26\21\24\26\21\25\26\204\22\24\27\4\21\24\27\22\24\27\22\24\26\22\25" - "\27\202\21\24\27\2\22\24\26\21\25\26\202\22\24\26\5\21\25\26\21\24\27" - "\22\24\26\22\24\27\21\24\27\202\22\24\26\202\22\24\27\6\22\24\26\22\24" - "\27\21\24\26\21\25\26\22\24\26\22\24\27\203\21\24\27\4\22\25\26\22\24" - "\27\22\25\27\22\24\27\202\21\24\27\2\21\25\27\22\25\30\203\22\25\27\3" - "\22\24\27\22\25\27\21\24\27\202\22\25\27\1\22\24\30\203\22\25\27\3\22" - "\24\27\22\25\30\22\24\27\202\22\25\27\202\22\25\30\204\22\25\27\11\22" - "\24\27\22\25\30\22\25\27\22\24\27\22\25\27\22\25\30\22\25\27\21\25\27" - "\22\25\30\202\22\25\27\203\22\24\30\6\22\25\27\22\24\30\22\25\27\22\24" - "\30\22\25\27\21\25\27\203\22\25\27\4\22\24\27\22\25\30\21\25\27\22\24" - "\27\202\22\25\27\1\22\24\27\202\22\25\27\1\22\24\27\202\22\25\27\202" - "\22\25\30\202\22\25\27\1\22\25\30\202\22\25\27\4\22\24\27\22\25\30\22" - "\25\27\22\25\30\202\22\25\27\1\22\24\27\204\22\25\27\5\22\25\30\22\25" - "\27\21\24\27\22\25\27\22\24\27\202\22\25\27\202\22\24\30\5\22\25\27\22" - "\25\30\21\24\27\22\25\27\22\24\30\202\22\25\27\10\21\25\27\22\25\27\22" - "\25\30\22\25\27\22\24\30\22\24\27\21\25\27\22\25\27\202\21\24\27\1\22" - "\24\27\202\22\25\30\2\22\25\27\22\24\27\202\22\25\27\2\22\24\27\22\24" - "\30\202\22\25\27\2\22\24\27\21\25\27\203\22\25\27\202\22\25\30\205\22" - "\25\27\14\21\25\30\22\24\27\22\25\30\22\24\27\22\25\27\22\24\27\21\25" - "\27\22\25\27\22\25\30\22\24\30\22\25\27\22\24\27\202\22\25\30\13\22\25" - "\27\22\24\27\22\25\30\22\25\27\21\25\27\22\25\27\21\25\30\22\25\27\22" - "\25\30\21\25\30\22\24\30\202\22\24\27\203\22\25\27\13\22\25\30\21\25" - "\27\21\24\27\22\25\27\21\25\27\22\25\27\22\24\27\22\25\30\22\24\27\22" - "\25\27\22\24\30\202\22\25\27\2\21\25\30\21\25\27\204\22\25\27\202\22" - "\25\30\1\22\24\30\202\22\25\27\6\22\25\30\21\25\27\22\25\27\21\25\30" - "\21\25\27\22\24\27\202\22\25\27\10\21\25\27\22\25\27\21\25\27\22\25\27" - "\21\24\27\22\25\27\22\24\30\22\24\27\202\22\24\30\202\22\25\27\202\22" - "\25\30\202\22\25\27\1\21\25\27\202\22\25\27\1\21\25\27\202\22\25\30\202" - "\22\25\27\1\21\25\27\202\22\25\27\10\22\24\27\22\25\30\22\25\27\22\24" - "\27\22\25\27\22\24\27\22\25\27\21\25\30\205\22\25\27\5\21\24\27\22\25" - "\27\22\25\30\22\24\27\21\25\27\202\22\25\27\3\22\24\27\22\25\27\22\25" - "\30\202\22\25\27\5\22\25\30\21\24\27\22\24\27\22\25\30\22\25\27\202\22" - "\24\27\202\22\25\30\1\21\24\27\203\22\25\27\1\22\25\30\202\22\25\27\2" - "\21\25\27\22\25\30\202\22\26\30\202\22\25\30\1\22\26\30\202\22\25\30" - "\1\22\25\31\213\22\25\30\3\22\26\30\22\25\30\22\26\31\202\22\25\30\3" - "\22\26\30\22\25\30\22\25\31\204\22\25\30\7\22\26\30\23\25\31\22\25\30" - "\22\25\31\22\25\30\22\26\30\23\25\30\206\22\25\30\2\22\26\30\22\25\30" - "\202\22\26\30\203\22\25\30\2\22\26\30\23\25\30\202\22\25\30\2\22\26\30" - "\22\25\31\205\22\25\30\202\22\26\30\204\22\25\30\3\22\26\30\22\25\30" - "\22\25\31\202\22\25\30\1\22\25\31\203\22\25\30\1\22\25\31\203\22\25\30" - "\1\23\26\30\202\22\26\30\2\22\25\30\22\26\30\202\22\25\30\1\22\26\30" - "\204\22\25\30\2\22\26\30\22\25\31\205\22\25\30\202\22\26\30\202\22\25" - "\30\1\22\26\30\202\22\25\30\202\22\26\31\5\22\25\31\22\25\30\22\26\30" - "\22\25\30\22\26\30\207\22\25\30\1\22\26\30\202\22\25\30\2\22\26\30\22" - "\25\31\203\22\25\30\1\22\26\30\204\22\25\30\1\22\26\30\204\22\25\30\203" - "\22\26\30\202\22\25\30\1\22\26\30\203\22\25\30\1\22\25\31\210\22\25\30" - "\2\22\26\31\22\25\31\202\22\25\30\202\22\26\30\3\22\25\30\23\25\30\22" - "\25\31\210\22\25\30\202\22\26\30\202\22\25\31\3\22\26\30\22\25\30\22" - "\26\30\205\22\25\30\1\22\26\30\203\22\25\30\204\22\26\30\202\22\25\30" - "\1\22\26\30\203\22\25\30\1\22\25\31\202\22\25\30\1\22\25\31\202\22\25" - "\30\1\22\26\30\202\22\25\30\1\22\25\31\203\22\25\30\1\22\26\30\202\22" - "\25\30\1\22\25\31\203\22\25\30\3\23\25\30\22\25\30\22\26\31\202\22\25" - "\30\3\23\25\30\22\25\30\22\26\31\203\22\25\30\2\22\26\30\22\26\31\202" - "\22\25\30\3\22\25\31\22\26\30\23\25\30\205\22\25\30\5\22\26\31\22\25" - "\30\22\25\31\22\25\30\22\26\30\204\22\25\30\1\23\25\30\204\22\26\30\202" - "\22\25\30\3\23\25\30\22\26\30\22\26\31\202\23\26\31\1\22\26\31\202\22" - "\25\31\2\22\26\31\22\25\31\207\22\26\31\2\22\25\31\23\26\31\203\22\26" - "\31\1\23\26\31\204\22\26\31\1\22\25\31\210\22\26\31\3\23\25\31\22\26" - "\31\22\25\31\203\22\26\31\4\22\25\31\23\26\31\22\26\31\22\25\31\221\22" - "\26\31\4\23\26\31\23\25\31\22\25\31\22\26\31\202\23\26\31\207\22\26\31" - "\1\23\26\31\203\22\26\31\1\23\25\31\204\22\26\31\3\23\26\31\22\26\31" - "\22\25\31\205\22\26\31\202\22\25\31\204\22\26\31\202\23\26\31\203\22" - "\26\31\6\22\26\30\22\26\31\23\25\31\22\26\31\23\26\31\22\26\31\203\22" - "\25\31\202\22\26\31\4\23\26\31\22\26\31\23\25\31\23\26\31\203\22\26\31" - "\2\23\26\31\22\26\31\203\22\25\31\203\22\26\31\10\22\25\31\22\26\31\22" - "\25\31\23\26\31\22\26\31\23\26\31\22\26\31\23\25\31\202\22\26\31\202" - "\23\26\31\203\22\26\31\1\22\25\31\203\22\26\31\1\23\26\31\202\22\26\31" - "\1\23\26\31\205\22\26\31\2\23\26\31\22\25\31\203\22\26\31\1\22\25\31" - "\203\22\26\31\2\22\25\31\23\25\31\205\22\26\31\1\22\25\31\202\22\26\31" - "\1\23\26\31\202\22\25\31\2\22\26\31\23\26\31\203\22\26\31\1\22\25\31" - "\202\22\26\31\1\23\26\31\202\22\26\31\202\22\25\31\1\23\26\31\206\22" - "\26\31\2\23\25\31\22\25\31\203\22\26\31\3\23\26\31\22\25\31\22\26\31" - "\202\22\25\31\2\22\26\31\22\25\31\202\22\26\31\1\22\25\31\202\22\26\31" - "\1\23\26\31\205\22\26\31\3\22\25\31\23\26\31\22\25\31\204\22\26\31\202" - "\22\25\31\1\22\26\31\202\23\26\31\202\22\26\31\5\22\25\31\22\26\31\23" - "\25\31\22\26\31\22\26\30\202\22\26\31\202\23\26\31\1\22\26\31\202\22" - "\25\31\203\22\26\31\3\23\25\31\22\26\31\23\26\31\202\22\26\31\12\22\25" - "\31\22\26\31\23\25\31\22\26\31\22\25\31\22\26\31\22\25\31\22\26\32\23" - "\26\32\22\27\32\202\23\26\32\202\22\26\32\2\22\27\32\23\26\31\202\22" - "\26\32\12\22\27\31\22\27\32\22\26\31\22\26\32\22\27\32\22\26\31\23\26" - "\32\22\26\31\22\27\32\22\26\32\202\22\27\32\1\22\26\32\202\22\27\32\7" - "\23\26\32\23\27\32\22\27\32\23\26\32\22\26\31\22\27\32\23\26\31\202\23" - "\26\32\2\22\26\31\22\26\32\202\23\26\32\16\22\27\32\23\26\32\22\27\31" - "\22\27\32\22\26\32\22\27\32\22\26\32\22\27\32\22\26\32\22\27\32\22\26" - "\32\23\26\32\22\26\32\22\27\32\202\22\26\32\11\22\27\31\22\26\32\23\26" - "\32\22\26\32\23\26\32\22\26\31\22\27\32\23\27\32\22\27\32\202\22\26\32" - "\202\22\27\32\7\23\27\32\22\27\32\22\26\32\23\26\32\22\27\32\22\26\32" - "\22\27\32\202\23\27\32\202\23\26\32\5\22\26\32\23\26\32\22\27\31\22\27" - "\32\23\26\32\204\22\26\32\4\23\26\32\22\27\32\22\26\32\23\27\32\202\22" - "\27\32\1\23\26\32\202\22\26\32\30\22\26\31\22\27\31\22\27\32\23\27\32" - "\22\26\32\23\26\32\23\27\31\22\27\32\22\26\31\22\27\32\22\26\32\22\27" - "\31\22\27\32\22\26\32\23\27\31\22\26\32\22\27\32\23\27\32\22\26\32\23" - "\27\31\23\27\32\22\27\32\22\26\32\22\26\31\202\22\26\32\6\22\27\32\23" - "\26\32\22\27\31\23\27\32\22\26\32\23\26\32\202\22\26\32\1\23\27\32\202" - "\22\26\32\2\22\27\32\23\27\31\203\23\26\32\5\22\26\32\23\27\32\23\30" - "\32\22\26\32\23\26\32\202\22\27\32\2\22\26\32\22\27\32\202\22\26\32\203" - "\23\26\32\3\22\26\32\23\27\32\23\26\32\202\22\26\32\1\22\26\31\202\22" - "\26\32\10\23\26\32\23\27\32\22\26\31\23\27\32\22\27\31\22\26\32\23\26" - "\32\22\26\32\204\23\26\32\5\22\26\32\23\26\32\22\26\32\22\27\32\22\26" - "\32\203\23\26\32\1\22\26\31\202\22\26\32\5\22\27\32\23\26\32\23\27\32" - "\22\27\31\23\26\31\203\22\26\32\1\23\26\32\202\22\27\32\10\22\26\32\23" - "\26\32\22\26\32\23\26\32\22\26\32\22\27\32\22\26\32\23\26\31\204\22\26" - "\32\10\23\26\32\22\27\32\22\26\32\23\26\31\22\27\32\23\27\32\22\27\32" - "\23\27\32\207\22\26\32\1\23\26\32\203\22\26\32\3\22\27\31\23\26\32\22" - "\27\32\203\22\26\32\202\23\26\32\1\22\26\31\203\23\26\32\1\22\26\32\203" - "\23\26\32\1\22\26\32\202\22\27\32\2\23\26\32\22\26\31\202\22\26\32\2" - "\22\27\32\23\27\32\202\23\26\32\2\23\27\32\22\27\32\202\22\26\32\202" - "\23\26\32\202\22\26\32\3\22\27\32\23\27\33\22\27\32\203\22\27\33\203" - "\23\27\33\3\23\27\32\22\27\32\23\27\33\202\22\27\33\3\23\27\33\22\27" - "\33\23\27\33\202\22\27\33\1\23\27\33\202\22\27\33\2\22\27\32\22\27\33" - "\202\23\27\32\202\22\27\33\202\23\27\33\203\22\27\33\1\23\27\33\203\22" - "\27\33\202\23\27\33\1\22\27\32\202\22\27\33\1\22\27\32\202\23\27\33\1" - "\23\26\33\202\22\27\33\4\23\27\33\22\26\33\22\27\33\23\27\33\203\22\27" - "\33\7\23\27\32\23\27\33\22\27\33\23\27\33\22\27\33\22\27\32\23\27\33" - "\202\22\27\33\1\23\27\33\202\22\27\33\1\23\27\32\203\23\26\33\13\23\27" - "\33\22\27\33\22\26\33\23\26\33\22\26\32\23\27\32\22\27\33\23\27\32\22" - "\27\33\22\26\33\22\27\32\202\23\27\33\4\23\27\32\23\27\33\22\27\33\23" - "\27\33\206\22\27\33\10\23\27\32\22\27\33\23\27\33\22\27\33\23\27\33\23" - "\26\33\22\27\33\22\27\32\206\22\27\33\203\23\27\33\202\22\27\33\2\23" - "\27\33\22\27\33\202\23\27\33\4\22\27\32\23\26\33\22\27\33\23\27\33\203" - "\22\27\33\1\23\26\33\203\23\27\33\202\22\27\33\4\23\27\33\22\27\32\22" - "\27\33\23\26\33\203\23\27\33\1\22\27\33\204\23\27\33\7\24\27\34\24\31" - "\35\22\27\33\23\27\32\23\27\33\22\27\33\23\27\33\202\22\27\33\6\23\27" - "\33\22\27\32\22\27\33\23\27\33\22\26\33\23\27\33\202\22\27\33\10\22\27" - "\32\23\27\33\22\27\32\23\27\33\22\27\33\23\27\33\23\26\33\22\27\33\202" - "\23\27\33\1\23\27\32\202\23\27\33\5\23\26\32\23\27\32\23\27\33\22\27" - "\33\22\26\33\202\23\27\33\5\22\26\33\23\27\33\22\27\33\23\27\33\23\26" - "\33\202\23\27\33\4\23\26\33\23\27\32\22\27\33\22\27\32\203\22\27\33\1" - "\22\26\33\203\23\27\33\1\23\26\33\204\22\27\33\202\23\27\33\12\22\27" - "\32\23\26\33\22\27\33\22\27\32\22\27\33\23\27\33\23\27\32\22\27\32\23" - "\27\33\22\27\33\202\23\27\33\6\22\27\33\23\26\32\22\27\33\23\27\33\22" - "\27\33\23\27\33\204\22\27\33\2\23\27\33\22\27\33\202\23\27\33\202\22" - "\27\32\5\23\27\33\23\26\33\22\27\32\23\26\33\23\27\33\202\22\27\33\1" - "\23\27\33\204\22\27\33\3\23\27\33\22\27\32\22\27\33\202\23\27\33\205" - "\22\27\33\5\23\26\33\22\26\33\23\27\33\22\27\33\23\26\33\204\22\27\33" - "\2\22\30\34\23\27\34\202\23\27\33\2\22\27\34\23\27\34\202\23\30\34\1" - "\22\27\34\202\23\30\34\7\23\27\34\22\30\34\22\27\34\23\27\34\22\30\34" - "\23\30\33\23\27\33\202\22\27\34\7\23\27\33\22\30\34\23\27\34\23\30\34" - "\22\27\34\23\30\34\23\27\33\202\23\30\33\11\23\30\34\23\27\33\22\30\34" - "\23\27\34\23\30\34\22\30\34\23\30\33\23\30\34\23\27\33\203\23\27\34\7" - "\22\30\34\23\27\34\22\27\33\23\30\33\23\27\33\23\27\34\22\30\34\202\23" - "\30\34\2\22\27\34\22\27\33\202\23\30\34\202\22\27\34\1\23\27\34\202\23" - "\30\34\15\23\27\34\22\30\34\23\30\34\22\27\34\23\30\33\22\27\34\22\30" - "\34\22\30\33\22\27\34\23\30\34\23\27\34\23\30\34\23\27\33\202\23\27\34" - "\1\23\27\33\202\23\27\34\11\23\30\34\22\30\34\23\30\34\22\27\33\22\30" - "\34\22\27\34\23\30\34\23\27\34\22\27\34\203\23\30\34\202\23\27\34\202" - "\23\30\34\202\22\30\34\1\22\27\34\203\23\30\34\202\23\27\34\6\22\30\34" - "\22\27\34\23\30\34\22\30\34\23\27\34\23\27\33\202\23\30\34\7\23\27\33" - "\23\30\34\23\30\33\23\27\33\22\27\34\23\30\33\23\30\34\202\23\27\34\202" - "\22\27\34\14\23\27\34\23\27\33\23\27\34\22\30\34\23\30\33\23\27\34\22" - "\30\33\23\27\34\23\27\33\22\27\34\23\30\34\23\27\34\202\23\30\34\14\23" - "\30\33\23\30\34\23\27\33\22\27\34\23\27\34\24\30\34\22\30\34\26\33\37" - "\24\30\35\26\33\36\22\30\34\23\27\34\202\23\30\34\12\22\27\34\23\30\34" - "\22\30\34\23\30\34\23\27\34\23\30\34\23\27\33\23\30\34\23\27\34\23\27" - "\33\202\22\30\34\202\23\27\34\26\23\30\34\22\30\34\23\30\34\22\27\33" - "\23\27\33\23\30\33\23\27\34\22\27\33\23\27\34\23\27\33\23\30\34\22\27" - "\34\22\30\34\23\30\34\22\30\34\23\27\34\22\27\34\23\27\34\23\30\33\22" - "\30\34\22\27\34\23\27\34\202\22\30\34\3\22\30\33\23\27\34\23\30\34\202" - "\22\27\34\6\23\27\34\22\30\34\23\30\34\22\27\34\22\30\34\23\27\33\202" - "\23\30\34\202\23\27\34\11\23\30\34\23\30\33\22\27\34\23\27\34\22\27\34" - "\22\30\34\22\27\34\23\30\34\23\27\34\202\22\30\34\13\23\30\34\23\27\34" - "\22\30\33\23\27\34\22\30\34\23\30\34\23\27\33\22\27\34\22\30\33\23\30" - "\34\22\30\34\202\23\27\34\202\22\30\34\13\23\30\33\22\27\33\22\27\34" - "\23\27\34\22\27\33\23\27\33\22\27\33\23\30\34\22\30\34\23\30\34\23\30" - "\33\202\23\30\34\12\23\30\33\23\27\33\23\27\34\22\27\34\22\30\34\22\27" - "\34\23\30\34\23\27\34\23\30\34\22\27\34\204\23\30\34\10\22\30\34\23\27" - "\34\22\27\34\22\27\33\23\30\34\23\27\33\23\30\34\23\30\33\203\23\30\35" - "\3\22\31\34\23\30\35\23\30\34\203\23\30\35\2\23\30\34\23\31\35\206\23" - "\30\35\2\22\30\35\22\30\34\202\23\30\34\205\23\30\35\2\23\30\34\22\30" - "\35\202\23\30\35\6\23\31\34\22\30\35\23\30\34\22\30\35\23\31\34\22\30" - "\35\211\23\30\35\1\22\30\35\206\23\30\35\2\22\30\35\23\30\34\206\23\30" - "\35\3\22\30\34\23\30\35\23\31\34\203\23\30\34\4\23\30\35\22\30\35\23" - "\30\35\22\30\35\202\23\30\35\202\22\30\35\205\23\30\35\1\23\31\35\203" - "\23\30\35\1\23\30\34\202\23\30\35\3\23\30\34\23\30\35\23\30\34\206\23" - "\30\35\6\22\30\35\22\30\34\23\30\35\22\30\35\23\30\34\22\30\35\202\23" - "\30\35\1\23\30\34\205\23\30\35\1\23\30\34\203\23\30\35\2\23\30\34\23" - "\31\34\202\23\30\35\6\23\30\34\23\30\35\22\30\35\23\31\35\23\31\36\23" - "\30\35\202\22\30\35\203\23\30\35\7\23\31\34\23\30\35\22\30\35\23\31\35" - "\22\30\34\23\30\35\22\30\35\202\23\30\34\10\23\30\35\25\31\36#%\"\40" - "$'\36\"'\23\30\35\24\31\35\24\31\36\204\23\30\35\3\23\30\34\23\30\35" - "\22\31\35\202\22\30\35\2\23\30\34\23\31\35\202\23\30\35\1\23\31\34\204" - "\23\30\35\2\22\31\34\22\30\35\202\23\30\35\202\23\30\34\7\22\30\34\23" - "\30\35\22\30\35\23\30\35\22\30\35\23\30\35\22\30\34\202\23\30\35\2\23" - "\31\34\22\30\34\202\23\30\35\2\22\30\35\23\30\34\202\23\30\35\1\23\31" - "\35\203\23\30\35\3\22\30\35\23\31\35\22\30\35\202\23\30\35\1\23\31\35" - "\202\23\30\35\202\23\30\34\202\23\30\35\1\23\31\34\202\23\30\35\3\22" - "\30\35\23\30\35\23\31\35\202\23\30\35\1\22\31\35\204\23\30\35\3\23\30" - "\34\23\30\35\23\31\35\202\23\30\35\1\23\30\34\202\23\30\35\3\23\31\34" - "\23\30\35\23\30\34\204\23\30\35\3\22\30\35\23\31\35\23\30\34\202\23\30" - "\35\2\22\30\34\23\30\34\202\23\30\35\2\23\31\35\22\31\35\204\23\30\35" - "\1\22\30\35\204\23\30\35\2\22\30\35\22\30\34\202\23\30\35\2\22\31\35" - "\23\30\35\203\23\30\34\202\23\30\35\1\22\31\35\202\23\31\36\1\23\31\35" - "\203\23\31\36\2\23\31\35\23\30\36\203\23\31\36\2\23\30\36\23\31\36\203" - "\23\31\35\3\23\30\36\23\31\36\23\31\35\204\23\31\36\1\23\30\36\203\23" - "\31\36\4\23\30\36\24\31\36\23\31\36\23\31\35\203\23\30\36\207\23\31\36" - "\3\23\31\35\23\31\36\23\30\36\203\23\31\36\1\23\31\35\202\23\31\36\202" - "\23\30\36\2\23\31\35\23\30\36\204\23\31\36\202\23\31\35\205\23\31\36" - "\1\23\31\35\202\23\31\36\202\23\31\35\203\23\31\36\3\23\31\35\23\30\35" - "\23\31\35\203\23\31\36\1\23\31\35\202\23\31\36\1\24\31\36\203\23\31\36" - "\202\23\30\36\6\23\31\36\23\31\35\23\31\36\23\30\36\23\31\36\23\31\35" - "\202\23\31\36\202\23\30\36\205\23\31\36\1\23\31\35\210\23\31\36\203\23" - "\31\35\202\23\31\36\3\23\30\36\26\33\40\26\34!\202\24\32\37\24\23\30" - "\35\24\32\36\25\33\40\40%*049059/37-15*/2(-1&+.$(-\"&,\"',\40%*\37%)" - ":>@\202e\21iR\21%*-\202\23\31\36\1\24\31\37\203\23\31\36\202\23\30\36" - "\1\23\31\35\205\23\31\36\1\23\30\36\202\23\31\36\3\23\31\35\23\31\36" - "\23\31\35\202\23\31\36\202\23\31\35\202\23\31\36\2\23\31\35\23\30\36" - "\206\23\31\36\202\23\30\36\5\23\31\36\23\30\36\23\31\36\23\30\35\23\30" - "\36\204\23\31\36\202\23\31\35\1\23\30\36\205\23\31\36\1\23\30\36\202" - "\23\31\36\2\23\30\36\23\31\36\202\23\30\36\4\23\31\36\23\31\35\23\31" - "\36\24\31\36\204\23\31\36\3\23\31\35\23\31\36\23\31\35\203\23\31\36\2" - "\23\31\35\23\30\35\207\23\31\36\1\23\30\36\202\23\31\36\2\23\30\36\23" - "\31\35\203\23\31\36\202\23\30\35\1\23\31\35\203\23\31\36\202\23\31\35" - "\202\23\31\36\1\23\30\36\203\23\31\36\20\23\31\35\24\31\35\23\30\35\23" - "\31\35\23\30\36\23\31\36\23\31\35\23\31\36\23\31\35\23\31\36\23\30\36" - "\23\31\36\23\30\36\23\31\35\23\31\36\23\31\37\202\23\32\37\202\23\31" - "\37\6\23\32\37\24\31\36\23\31\37\24\32\37\23\31\37\23\32\37\202\23\31" - "\37\202\23\32\37\203\23\31\37\202\23\32\37\1\23\31\36\202\23\31\37\2" - "\24\31\37\23\31\36\202\23\31\37\6\23\32\37\23\31\36\23\32\36\23\31\37" - "\24\31\37\23\32\37\202\23\31\37\1\23\31\36\202\23\31\37\5\23\32\37\23" - "\31\37\23\31\36\23\31\37\23\32\37\203\23\31\37\4\24\31\36\23\32\37\23" - "\31\37\23\32\37\203\23\31\37\2\23\32\36\23\32\37\202\23\31\37\14\24\32" - "\37\23\32\37\23\31\37\23\32\37\23\32\36\23\31\36\23\31\37\23\32\37\23" - "\31\37\23\32\37\23\32\36\23\31\37\202\23\32\36\2\23\31\37\23\31\36\202" - "\24\31\37\1\23\32\36\202\23\31\37\1\23\32\37\202\23\31\37\14\24\31\37" - "\23\31\37\24\31\37\23\31\36\23\32\37\23\32\36\23\31\37\24\31\37\23\31" - "\37\24\31\37\23\31\36\24\31\36\204\23\32\37\6\23\31\37\24\32\36\23\31" - "\37\23\32\36\23\31\37\23\32\37\202\23\31\37\3\23\32\37\24\32\37\23\31" - "\37\202\24\31\36\1\23\32\37\202\23\31\37\36\23\32\36\23\31\37\23\32\37" - "\24\32\40\26\33!\24\32\37\23\31\37\25\33\37\27\35#\"'+/48:=\77JKKQQQ" - "VWW__`eeeccd`aa]^_TVWKLN>@A@AC@BCKA#\314\226\2\312\226\11(+)\23\32\36" - "\203\23\31\37\1\24\31\37\204\23\31\37\13\23\32\36\23\32\37\24\31\37\24" - "\32\37\24\31\37\23\31\37\23\32\37\23\32\36\23\32\37\24\31\37\23\31\36" - "\202\23\32\37\3\23\31\37\23\32\37\23\31\37\202\24\31\37\206\23\31\37" - "\1\23\32\37\202\23\31\37\14\24\32\37\23\31\36\23\31\37\23\32\37\24\32" - "\36\24\31\37\24\32\37\23\31\36\23\31\37\24\32\37\23\31\37\24\31\37\202" - "\23\31\37\2\24\31\37\23\32\37\202\23\31\37\6\23\32\37\24\31\37\23\31" - "\36\23\31\37\23\32\37\24\32\37\202\23\32\37\2\24\31\36\23\32\37\202\23" - "\31\37\2\23\32\37\23\31\37\202\23\31\36\2\23\31\37\24\32\37\202\23\31" - "\37\1\23\32\37\202\23\31\37\1\23\32\37\202\23\31\36\7\23\32\37\24\31" - "\37\23\31\37\23\32\37\23\31\36\23\32\37\24\32\37\207\23\31\37\2\23\32" - "\37\23\31\36\202\23\32\37\1\23\31\37\202\23\32\37\202\23\31\37\203\23" - "\31\36\11\23\31\37\24\32\37\23\31\37\23\32\37\23\31\37\24\32\37\23\32" - "\37\23\31\37\23\32\37\202\23\31\36\1\23\31\37\202\23\32\37\4\24\32\40" - "\23\32\40\23\32\37\24\32\40\202\23\32\40\1\23\32\37\202\24\32\40\203" - "\23\32\40\1\23\32\37\213\23\32\40\1\24\32\40\202\23\32\40\1\24\32\40" - "\202\23\32\40\202\24\32\40\202\23\32\40\4\24\32\40\24\32\37\23\32\40" - "\24\32\40\202\23\32\40\202\24\32\40\206\23\32\40\203\24\32\40\203\23" - "\32\40\1\24\32\40\202\23\32\40\5\24\32\40\23\32\40\24\32\40\23\32\40" - "\24\32\40\203\23\32\40\203\24\32\40\6\23\32\40\23\32\37\23\32\40\24\32" - "\40\23\32\40\23\32\37\204\23\32\40\1\24\32\40\207\23\32\40\6\24\32\40" - "\23\32\40\24\32\40\23\32\40\23\32\37\24\32\40\203\23\32\40\202\23\32" - "\37\203\24\32\40\204\23\32\40\1\24\33\40\204\23\32\40\1\23\32\37\205" - "\23\32\40\15\23\32\37\23\32\40\24\33!\26\34\"\34#)&+/&),78:abb\221\221" - "\221\275\275\275\344\344\344\357\357\357\205\361\361\361\16\357\357\357" - "\350\350\350\321\321\321\271\271\271SQI\201_\3\355\256\2\323\234\11/" - "+\36\40%*\25\34\"\24\32\40\23\32\40\23\32\37\202\23\32\40\203\24\32\40" - "\4\23\32\40\24\32\40\23\32\40\24\32\40\205\23\32\40\1\24\32\40\204\23" - "\32\40\1\24\32\40\202\23\32\40\1\24\32\37\202\23\32\40\202\23\32\37\205" - "\23\32\40\202\24\32\40\3\23\32\40\23\33\40\24\32\40\202\23\32\40\1\24" - "\32\40\202\23\32\40\10\24\32\40\23\32\40\24\32\40\23\32\37\24\32\37\23" - "\32\37\23\32\40\24\32\40\203\24\32\37\6\23\32\40\24\32\40\23\32\40\24" - "\32\40\23\32\37\23\32\40\204\24\32\40\1\23\32\40\202\24\32\40\202\23" - "\32\40\1\24\32\40\202\23\32\40\1\24\32\40\203\23\32\40\2\24\32\40\23" - "\32\37\203\23\32\40\1\24\32\40\204\23\32\40\2\24\32\37\23\32\40\202\24" - "\32\40\204\23\32\40\202\24\32\40\202\24\32\37\6\24\32\40\23\32\40\23" - "\32\37\24\32\40\23\32\37\24\32\40\207\23\32\40\2\23\32\37\24\32\40\203" - "\23\32\40\204\24\33!\2\24\33\40\23\33!\203\24\33!\3\24\32!\23\33!\24" - "\32!\202\24\33!\4\23\33!\24\32!\23\33!\23\32\40\202\24\33!\12\23\32!" - "\24\32!\23\33!\24\32!\24\33!\23\32!\23\33!\23\33\40\24\32!\24\33!\202" - "\23\33!\3\24\33!\23\33!\23\33\40\202\23\33!\2\23\32!\23\33\40\202\24" - "\33!\1\24\32\40\202\24\33!\2\23\33!\23\33\40\202\24\33!\1\24\33\40\204" - "\24\33!\3\23\32!\24\32!\23\32\40\202\23\33!\2\24\33!\24\33\40\203\23" - "\32!\1\24\33!\202\23\33!\3\24\33!\23\33\40\23\32!\204\23\33!\2\24\32" - "!\23\33!\203\24\33!\7\23\33!\23\33\40\24\32!\23\33!\24\32!\23\33!\24" - "\33!\202\24\32!\20\23\32!\24\33!\24\32!\23\33!\24\33!\24\32!\23\32!\24" - "\33!\24\33\40\24\33!\23\33\40\24\33!\23\33!\24\32!\24\33!\23\33!\203" - "\23\32!\3\23\33!\23\32!\24\33!\203\23\33!\20\24\32!\23\33!\23\32!\24" - "\32!\27\37$&,1,04035\\]]\225\225\225\312\312\312\357\357\357\362\362" - "\362\363\363\363\367\367\367\365\365\365\204\361\361\361\17\360\360\360" - "\356\356\356\346\346\346\335\335\335\276\276\276XJ$\273\211\0\361\261" - "\3\323\235\11""3+\26*,-(+0\35#)\27\36$\24\33\"\202\24\33!\5\23\32!\23" - "\33!\24\33!\23\33!\24\33!\202\23\33!\4\23\32!\24\33\40\24\33!\24\32!" - "\202\23\33!\1\23\33\40\202\24\33!\4\24\33\40\23\32!\24\33!\24\32!\202" - "\24\33!\6\23\33!\24\32!\24\33!\23\33!\24\32!\23\32!\202\23\33!\3\23\32" - "!\24\32!\24\33!\202\23\33!\1\24\33!\202\23\32!\2\24\32\40\24\33!\202" - "\24\32!\5\23\32!\24\32\40\24\33!\23\33!\24\33!\202\23\33!\2\24\33!\24" - "\33\40\203\24\33!\3\23\32!\24\33!\23\32!\202\24\32!\12\24\33!\23\33!" - "\23\32!\24\33!\23\33!\24\33!\23\33\40\24\33!\23\33!\24\33!\203\23\32" - "!\2\23\33!\24\33!\202\23\33!\1\24\33!\202\23\32!\3\24\33!\24\32!\23\33" - "!\203\24\33!\202\23\33!\4\24\33!\23\33!\23\32!\23\33!\203\24\33!\202" - "\24\32!\13\24\33!\24\33\40\24\33!\24\32!\24\33!\23\33!\24\32!\23\33!" - "\23\33\40\24\32!\23\33!\202\24\33!\4\23\33!\24\33!\23\33!\24\33\40\202" - "\24\33\"\16\24\34\"\23\33\"\24\34\"\23\34\"\24\33\"\23\33\"\23\34\"\24" - "\34\"\24\33#\24\33\"\24\34\"\23\33\"\23\34\"\24\33\"\203\23\33\"\2\24" - "\33\"\23\33\"\202\24\33\"\202\24\34\"\202\24\33\"\1\23\34\"\202\24\33" - "\"\202\24\34\"\2\24\33\"\23\33\"\204\24\33\"\1\24\34\"\203\24\33\"\5" - "\24\34\"\24\33\"\23\33\"\24\33\"\24\34\"\203\24\33\"\7\23\33\"\24\33" - "\"\23\33\"\24\33\"\23\34\"\24\33\"\24\34\"\204\24\33\"\205\24\34\"\205" - "\24\33\"\4\23\33\"\24\34\"\24\33\"\23\33\"\202\24\34\"\1\23\33#\202\24" - "\33\"\5\24\34\"\23\34\"\24\33\"\23\33\"\23\34\"\203\24\33\"\13\23\33" - "#\24\33\"\23\34\"\24\33\"\23\33\"\23\34\"\23\33\"\24\34\"\24\33\"\23" - "\34\"\24\33\"\202\24\34\"\2\23\33\"\24\34\"\207\24\33\"\1\24\34\"\202" - "\24\33\"\17\23\33\"\23\34\"\33!)-36678jjk\261\261\261\345\345\345\365" - "\365\365\367\367\367\370\370\370\372\372\372\370\370\370\372\372\372" - "\373\373\373\204\371\371\371\24\367\367\367\363\363\363\351\351\351\333" - "\333\333vtp\213h\7\327\236\0\367\266\4\325\236\11VM6\215\215\215fgg*" - "-0\37$*\27\36%\25\34#\23\33\"\24\33\"\23\34\"\23\33\"\202\24\33\"\1\24" - "\34\"\202\24\33\"\5\24\34\"\24\33\"\23\33\"\24\33\"\24\34\"\202\24\33" - "\"\1\23\33\"\203\24\33\"\1\23\33\"\203\24\33\"\1\24\34\"\202\24\33\"" - "\1\24\34\"\203\24\33\"\1\24\34\"\206\24\33\"\1\24\34\"\206\24\33\"\6" - "\24\34\"\23\33\"\24\33\"\24\34\"\24\33\"\23\33\"\202\24\33\"\1\23\34" - "\"\202\24\33\"\1\23\34\"\202\24\33\"\11\23\34\"\23\33\"\24\34\"\23\34" - "\"\24\33\"\24\34\"\24\33\"\23\34\"\24\34\"\202\24\33\"\203\23\33\"\203" - "\24\33\"\1\23\33\"\204\24\33\"\202\24\34\"\2\23\34\"\24\34\"\202\23\33" - "\"\1\24\34\"\202\24\33\"\7\23\33\"\24\33\"\23\34\"\24\34\"\23\33\"\24" - "\34\"\23\33\"\202\24\33\"\4\24\34\"\24\33\"\23\33\"\24\33\"\202\24\34" - "\"\11\24\33\"\23\34\"\24\33\"\23\33\"\24\33\"\23\34\"\23\33\"\24\33\"" - "\24\34\"\214\24\34#\3\24\35#\24\34#\24\34$\203\24\34#\1\24\34$\202\24" - "\34#\1\24\34$\204\24\34#\1\24\34$\216\24\34#\1\24\34$\203\24\34#\2\23" - "\34#\24\35#\202\24\34#\1\24\34$\204\24\34#\1\23\34$\203\24\34#\1\24\34" - "$\202\24\34#\1\23\34$\203\24\34#\2\24\34$\23\34#\203\24\34#\1\24\34$" - "\202\24\34#\1\24\34$\206\24\34#\1\24\34$\203\24\34#\3\24\35#\24\34#\24" - "\34$\207\24\34#\1\24\34$\204\24\34#\1\23\34#\202\24\34#\2\24\35$\24\34" - "$\211\24\34#\7\26\36%$*0$')NOO\262\262\262\355\355\355\374\374\374\202" - "\371\371\371\4\373\373\373\372\372\372\375\375\375\376\376\376\205\377" - "\377\377\22\376\376\376\373\373\373\366\366\366\355\355\355\303\303\303" - "aU5\260\201\1\345\247\0\372\270\4\331\242\11_S3\254\254\254\332\332\332" - "\250\250\251VWY\35\"&\31!'\25\35$\204\24\34#\1\24\35$\206\24\34#\1\23" - "\34#\211\24\34#\1\23\34#\204\24\34#\1\23\34#\206\24\34#\2\24\34$\24\34" - "#\202\23\34#\202\24\34#\1\24\34$\202\24\34#\2\24\34$\23\34#\203\24\34" - "#\1\24\34$\203\24\34#\1\24\34$\205\24\34#\4\23\34#\24\35#\24\34#\24\34" - "$\202\24\34#\3\24\35#\24\34#\24\34$\213\24\34#\3\24\34$\24\34#\24\34" - "$\206\24\34#\3\24\34$\23\34#\24\34$\203\24\34#\3\24\34$\24\34#\24\35" - "$\204\24\34#\202\24\34$\202\24\34#\1\24\34$\207\24\34#\7\23\34#\24\34" - "#\24\35$\24\34$\24\35$\24\34$\25\34%\202\24\35$\7\24\34$\24\35%\24\35" - "$\25\35$\24\35$\24\34$\25\35$\206\24\35$\3\24\35%\25\34$\24\35%\203\25" - "\35$\4\24\35$\24\34$\24\35$\25\35%\204\24\35$\1\24\34$\202\24\35$\4\25" - "\35%\25\35$\24\35$\25\35$\202\24\35$\6\24\34$\24\35$\25\35$\24\35$\25" - "\35$\24\34$\202\24\35%\202\24\35$\202\24\35%\207\24\35$\5\24\35%\24\34" - "$\24\35$\24\34$\24\34%\203\24\35$\6\24\35%\25\34%\24\35%\24\35$\24\34" - "$\25\34$\202\24\34$\3\24\35$\24\34$\25\35$\203\24\34$\1\24\35$\202\24" - "\35%\202\24\35$\203\24\34$\4\24\35%\24\35$\24\35%\24\34$\205\24\35$\5" - "\24\35%\24\35$\24\35%\24\34%\24\35$\202\24\34$\1\25\34%\202\24\34$\7" - "\24\35$\33#++05467\200\201\201\323\323\323\372\372\372\202\376\376\376" - "\202\374\374\374\202\376\376\376\207\377\377\377\33\375\375\375\371\371" - "\371\363\363\363\352\352\352\237\237\236jS\22\300\214\0\354\255\0\372" - "\270\4\340\247\12hW)\247\247\247\340\340\340\341\341\341\326\326\326" - "}~\177+/2\35$*\26\37&\24\34$\25\35$\24\35$\24\34$\24\34%\24\34$\24\35" - "$\24\35%\202\24\35$\1\24\34$\202\24\35$\11\24\34%\24\34$\25\34$\24\35" - "$\24\34%\24\34$\24\35$\25\35$\24\35$\202\24\34$\1\24\35%\202\24\35$\202" - "\24\34$\10\24\34%\24\35$\24\34$\25\35%\24\35%\24\34$\24\35$\24\34%\203" - "\24\35$\4\24\34$\24\34%\24\35$\25\34$\206\24\35$\1\24\34%\202\24\35$" - "\1\24\34%\202\24\35$\1\25\35$\202\24\35$\4\24\34$\24\35%\24\34%\24\34" - "$\202\24\35$\1\25\35$\202\24\35$\10\24\34%\24\35%\24\34%\24\35%\24\35" - "$\24\35%\24\34$\25\35%\202\24\35$\2\24\34$\24\35$\202\24\35%\203\24\35" - "$\1\24\34%\202\24\35$\3\24\35%\24\34$\24\34%\203\24\35$\4\25\35$\25\35" - "%\24\35$\25\35%\202\24\35$\1\24\34$\202\25\35$\2\24\35$\24\35%\202\24" - "\35$\2\25\35$\24\35%\202\24\34$\11\25\35$\24\35%\24\36%\24\35%\25\35" - "%\24\35%\24\35&\24\35%\25\35%\202\24\36%\202\25\35%\202\24\35%\202\24" - "\35&\5\24\36%\25\36&\24\36%\24\35%\24\35&\202\24\36&\25\24\36%\24\35" - "%\25\36&\24\36&\24\35%\24\35&\24\35%\24\36%\25\36%\24\35%\24\36%\24\35" - "%\24\36%\25\36%\24\35%\24\35&\24\36%\24\35%\24\36&\24\35%\24\36%\202" - "\24\36&\7\24\36%\24\36&\25\36%\25\35%\24\36&\24\35%\25\36%\202\24\35" - "%\11\25\36&\24\35&\25\36%\24\36%\24\35&\24\36%\24\35%\24\36%\24\35&\203" - "\24\36&\7\24\36%\25\35&\24\35%\25\36%\25\35&\24\35%\24\36%\202\24\35" - "%\7\25\36&\24\36%\24\35%\24\35&\24\36%\24\35%\24\35&\202\24\35%\5\24" - "\36&\25\36%\25\35%\24\35&\24\35%\202\24\36&\3\24\35%\24\36%\25\35%\202" - "\24\35%\7\25\36%\24\35%\25\36&\24\35%\24\36&\25\35&\25\35%\202\24\36" - "&\204\24\35%\10\26\36&\"*1&+/ABC\254\254\254\360\360\360\377\377\377" - "\376\376\376\214\377\377\377!\376\376\376\374\374\374\367\367\367\357" - "\357\357\337\337\337|xl}\\\3\315\226\0\362\261\0\372\270\4\341\247\11" - "jX)\253\253\253\350\350\350\353\353\353\347\347\347\332\332\332\242\243" - "\243CEG\34#)\26\37(\24\35%\24\36&\25\35%\24\35%\24\36&\24\36%\24\35&" - "\25\35&\25\36&\24\36&\24\35%\24\36&\202\24\35%\2\24\36%\24\36&\202\25" - "\35&\202\24\35%\2\25\35%\24\35%\202\24\36&\2\24\36%\24\35&\202\24\35" - "%\11\24\36%\24\35&\24\35%\24\36%\24\35%\24\36&\24\35%\24\36&\25\35%\202" - "\24\35%\1\24\36&\202\24\36%\16\24\35%\24\35&\24\36&\24\36%\25\35%\24" - "\35&\24\35%\25\35&\24\35%\24\35&\24\36%\25\35%\24\35%\25\35%\203\24\35" - "%\12\24\35&\25\36%\24\35%\24\36%\25\35%\25\35&\25\36%\24\35%\25\35%\24" - "\35%\202\24\35&\202\24\36&\3\25\36&\24\35&\25\35%\202\25\35&\2\25\36" - "&\25\35%\202\24\35%\1\24\35&\204\24\35%\2\25\35&\24\35&\202\25\35&\5" - "\24\35%\25\36%\25\35%\25\36&\24\36&\202\24\36%\203\25\35%\3\25\36%\24" - "\35%\25\36%\203\24\36&\202\24\35%\11\24\35&\24\36%\25\37&\24\36'\25\36" - "&\25\36'\24\36&\25\36&\25\36'\202\25\36&\202\24\36&\6\25\36'\24\36'\25" - "\37&\25\36&\25\36'\24\37'\203\25\36&\3\24\36'\25\36&\24\36&\202\24\36" - "'\1\25\36'\202\25\36&\2\24\36&\25\36&\203\25\36'\202\25\36&\5\24\36'" - "\24\36&\25\36&\25\36'\25\36&\202\24\36&\202\25\36'\5\25\36&\25\36'\25" - "\37&\25\36&\24\36'\202\25\36&\3\24\36'\25\37&\25\36&\202\25\36'\3\24" - "\36'\25\36'\24\36'\202\25\36&\16\24\36'\25\36&\24\36'\25\36&\25\36'\25" - "\36&\24\36&\25\36&\25\36'\24\36'\25\37'\24\37'\25\36&\25\36'\203\25\36" - "&\7\24\36&\25\36&\24\36&\24\36'\25\36&\24\36'\24\36&\202\24\36'\203\25" - "\36&\2\24\37&\25\36&\202\24\36&\2\25\36&\24\37'\202\25\36'\21\24\37&" - "\25\36&\24\36'\25\36&\25\36'\24\36'\24\36&\25\37'\24\36&\25\36'\25\36" - "&\26\37'-4;269QQR\277\277\277\366\366\366\217\377\377\377\31\376\376" - "\376\373\373\373\364\364\364\354\354\354\326\326\326d\\D\217i\2\325\233" - "\0\364\263\0\372\270\4\351\256\11s\\\36\244\244\244\360\360\360\366\366" - "\366\356\356\356\344\344\344\335\335\335\271\272\272OQR\40&+\30!)\24" - "\36'\24\36&\25\37'\202\25\36'\1\25\37'\202\25\36'\2\24\36'\25\36'\202" - "\24\36&\202\25\36'\6\25\36&\24\36'\24\36&\24\36'\25\36'\24\36'\202\25" - "\36'\4\24\36'\25\36&\24\36'\25\36'\202\25\36&\1\24\36&\203\25\36'\3\24" - "\37'\24\36&\25\36&\202\24\36&\1\24\36'\203\25\36'\203\25\36&\202\24\36" - "'\23\25\36&\25\36'\25\36&\24\37'\25\36'\25\36&\24\36'\25\36&\24\36'\25" - "\36&\25\36'\24\37&\25\36&\24\36&\25\36&\25\36'\25\37&\25\36'\24\36'\202" - "\25\36&\202\25\36'\13\24\36&\25\36'\24\36'\25\36&\24\37&\25\37'\25\36" - "&\24\36&\25\36&\24\36&\24\36'\202\25\36&\202\25\36'\3\25\36&\25\37&\24" - "\36'\202\24\36&\5\24\36'\24\36&\25\36'\24\36&\25\36'\205\24\36&\203\25" - "\36'\1\25\36&\202\25\36'\3\24\36'\24\37&\25\36'\202\24\36'\2\24\37(\25" - "\37'\204\25\37(\4\25\37'\24\37'\25\37(\24\37'\202\25\37(\14\24\37(\25" - "\36(\25\37(\24\37(\25\37(\24\37(\25\37(\24\37(\25\37(\24\37(\25\37'\25" - "\37(\202\25\36(\203\25\37(\3\24\37'\25\37(\25\37'\202\25\37(\1\24\36" - "(\202\25\37(\1\25\36(\202\25\37'\2\25\37(\25\36(\202\25\37'\204\25\37" - "(\3\25\37'\25\37(\25\37'\203\25\37(\2\24\37(\25\37'\203\25\37(\1\25\36" - "(\203\25\37(\202\24\37(\202\25\37(\4\24\37(\25\37(\25\37'\24\37'\202" - "\24\37(\14\25\36'\25\37(\25\36(\24\37(\25\36(\25\37(\25\36(\25\37(\25" - "\36'\25\37(\25\36'\24\36(\202\25\37(\3\25\36(\25\37(\25\36'\202\25\37" - "(\2\24\37'\24\37(\206\25\37(\3\24\37(\25\36'\25\37(\202\24\37(\2\25\37" - "(\25\37'\202\25\37(\5""2:A7:<\\\\\\\324\324\324\375\375\375\220\377\377" - "\377\30\376\376\376\372\372\372\362\362\362\351\351\351\322\322\322b" - "X\77\230p\2\335\242\0\367\265\0\372\270\3\355\261\11w^\30\235\235\235" - "\363\363\363\376\376\376\371\371\371\355\355\355\345\345\345\341\341" - "\341\303\303\303_`a\35$*\26\37)\24\37'\203\25\37(\2\24\37'\24\36(\202" - "\25\37(\202\25\37'\4\24\37(\25\37'\24\37(\25\37'\202\25\37(\3\24\37'" - "\25\37'\25\37(\203\25\37'\2\25\36(\24\37(\202\25\37'\2\24\36(\25\37'" - "\202\24\37(\2\25\37'\24\37(\210\25\37(\203\25\37'\203\24\37(\4\25\36" - "(\25\37(\24\37(\25\36'\202\25\37(\6\24\37(\25\36(\24\37(\25\37'\25\37" - "(\25\37'\202\25\37(\13\25\37'\25\37(\24\36(\25\37'\24\37'\25\37(\25\36" - "'\25\37(\25\37'\25\37(\25\37'\202\25\37(\3\24\36(\25\37(\24\37(\202\25" - "\37(\203\25\37'\2\25\37(\24\37(\203\25\37(\202\24\37(\6\24\36(\25\37" - "'\25\37(\24\37'\25\37(\24\36(\202\25\37(\202\25\36(\5\25\37'\25\37(\25" - "\37'\24\37'\25\36(\202\25\37(\5\24\37(\25\36(\25\37'\25\37(\25\40(\204" - "\25\37)\2\25\40)\25\37)\203\25\40)\1\25\37)\203\25\40)\14\25\37)\25\40" - ")\25\37)\25\40)\24\36(\24\36'\24\35&\23\35&\23\34%\23\35%\22\34$\23\34" - "$\202\23\35%\6\23\35&\24\36'\24\36(\25\37(\25\37)\25\40)\202\25\37)\5" - "\25\40)\24\37(\25\37)\25\40)\25\37)\204\25\40)\1\25\37)\202\25\40)\5" - "\24\37(\24\36'\23\36&\23\34%\23\35%\202\23\34%\3\23\34&\24\36'\24\37" - "(\202\25\40)\202\25\37)\1\25\40)\202\25\37)\2\25\40)\25\37)\203\25\40" - ")\202\25\37)\1\25\40)\202\25\37)\1\25\40(\203\25\37)\2\25\40)\25\40(" - "\202\25\37)\3\24\37'\24\36'\23\35&\204\23\35%\6\23\36&\23\35'\25\36(" - "\24\40)\25\40)\24\40)\203\25\40)\1\25\37)\205\25\40)\1\25\37)\202\25" - "\40)\5#.6>@BJJJ\300\300\300\376\376\376\221\377\377\377\30\375\375\375" - "\370\370\370\360\360\360\347\347\347\277\277\276SA\21\270\206\0\345\250" - "\0\363\262\0\370\266\1\357\261\10z_\25\235\235\235\363\363\363\377\377" - "\377\376\376\376\370\370\370\357\357\357\346\346\346\335\335\335\272" - "\272\272MOQ\34%,\26!*\202\25\37)\2\25\40)\25\40(\203\25\40)\1\25\37)" - "\203\25\40)\203\25\37)\1\25\37(\202\25\40)\203\25\37)\11\25\36'\24\37" - "'\23\34&\23\33$\22\33#\21\32\"\21\32!\21\31\40\20\30\40\202\20\31\40" - "\10\21\31\40\21\31!\21\32\"\21\33#\23\35%\24\36&\24\36(\25\40)\202\25" - "\37)\203\25\40)\2\25\37)\25\40)\204\25\37)\202\25\40)\2\25\37)\24\37" - ")\202\25\37)\15\25\40)\25\36(\24\36'\23\35&\23\33$\22\33#\21\32\"\21" - "\31!\21\30\40\20\31\40\20\31\37\20\30\40\21\31\40\202\21\31!\5\21\33" - "\"\22\34#\23\34%\24\35&\24\36'\202\25\37)\14\25\40)\25\37)\25\40)\25" - "\37)\25\40(\25\37)\25\40)\25\37)\24\37(\24\36&\23\35%\22\34$\202\22\33" - "#\6\22\33$\23\34%\24\36'\24\36(\25\40)\25\37)\205\25\40)\202\25\37)\203" - "\25\40)\10\25!*\25\40*\26\40*\25\40*\25!*\26\40*\25!*\25\40*\202\25!" - "*\202\25\40*\1\25\40+\202\26\40*\202\25\40*\20\24\37)\23\35'\22\34$\21" - "\32\"\20\30\37\17\26\35\17\26\34\16\25\34\16\26\34\17\26\35\17\27\36" - "\20\31\40\21\32#\23\34%\24\36'\25\40)\202\25\40*\1\25!*\203\25\40*\33" - "\25!*\25\40*\26\40*\26\40+\25!*\25\40*\25!*\25\40*\24\40*\24\35'\22\34" - "$\20\31!\17\27\36\17\26\35\16\26\35\17\27\36\21\31\40\21\33#\23\36&\25" - "\37(\25\40*\26\40*\25\40*\26!+\25\40*\26\40+\25!+\202\25\40*\1\25!*\202" - "\25\40*\3\26\40*\25\40*\25!*\202\25\40+\3\25\40*\25\40+\25\40*\202\25" - "!*\6\25\40*\25\37)\24\36'\21\33#\20\30\40\17\26\35\202\16\26\34\15\17" - "\27\36\21\31\40\22\33$\24\36'\25\37)\25\40*\25!*\25!+\25\40*\25\40+\25" - "\40*\26!*\25!*\202\25\40*\1\25\40+\202\25\40*\7\32$.FJN777\242\242\242" - "\360\360\360\375\375\375\376\376\376\220\377\377\377\16\374\374\374\366" - "\366\366\356\356\356\345\345\345\211\207\200xY\3\323\232\0\353\254\0" - "\316\227\1\314\226\1\351\254\4x_\32\244\244\244\364\364\364\202\377\377" - "\377\10\376\376\376\372\372\372\357\357\357\344\344\344\334\334\334\261" - "\261\261/36\32$-\202\25\40*\4\25!*\25\40*\26\40+\25\40+\202\25\40*\2" - "\26\40*\25!*\202\25\40*(\25!*\25\40*\25\40+\26\40*\25!+\25\40*\26!*\25" - "\40*\24\37(\22\34%\20\31!\16\25\33\13\21\27\12\17\24\10\15\21\10\14\20" - "\7\13\17\7\13\16\7\13\17\10\14\20\11\16\22\12\20\24\14\23\31\17\26\35" - "\21\32#\23\36&\25\40)\25!*\25\40*\25!*\25\40+\25!*\26!*\26\40*\26\40" - "+\25!*\25\40*\26\40+\25!*\25\40*\202\25\40+\27\25\40*\25!*\25\40)\24" - "\36(\22\34$\20\30\40\16\25\33\14\22\27\12\17\24\11\15\21\10\14\17\10" - "\13\16\7\13\16\7\13\17\10\14\17\11\15\20\11\16\22\12\20\25\14\23\31\16" - "\26\35\21\32\"\23\34%\24\37(\202\26\40*\1\25!+\202\25!*\203\25\40*\14" - "\25\37)\23\35&\21\32#\16\26\35\15\24\32\14\23\30\15\23\30\15\24\32\17" - "\26\35\21\32#\23\35'\25\37)\202\25\40*\1\26\40+\202\25\40*\35\25!*\25" - "\40+\25!*\25\40*\26!*\25\40*\25!,\26!,\25!,\25!+\26!,\25!,\26!+\25!," - "\26!+\26!,\26!+\25!,\25!+\25\40*\25\40)\23\36(\22\34%\20\31!39>06:`c" - "f^ac\203\204\206\203\202\204\205\17z|}{|~VZ]Y\\`&,2'/5\21\32#\22\35&" - "\24\37)\24\40)\26!*\26!,\25!+\26!,\26!+\202\25!,\26\25!+\26!*\25\40*" - "\24\36'\22\33$.6<,27tvysuv\202\204\205\203\204\205vxzwz|8=A:@E\21\32" - "\"\22\35%\25\37)\25\40*\25\",\25!+\25!,\203\25!+\3\25!,\26!+\26!,\202" - "\25!+\16\26!+\25!+\26!,\25!,\25!+\25!,\25\",\26!,\26!+\25\37)\22\33$" - "fkndgi\203\205\206\202\202\204\205\10\203\205\207X\\_[_c\22\34$\24\37" - "*\26\",\25!+\26!+\204\25!+\15\26!+\25!,\26!+\25!,\25!+\27\"-/7<81\32" - "KJG\240\240\240\320\320\320\360\360\360\373\373\373\217\377\377\377\17" - "\376\376\376\373\373\373\365\365\365\354\354\354\336\336\336PI6\266\206" - "\1\356\256\1\256\201\6\\J\30sU\3\264\204\1r_,\270\270\270\366\366\366" - "\203\377\377\377\10\376\376\376\372\372\372\356\356\356\343\343\343\330" - "\330\330\217\220\221#*0\27#.\202\25!+\1\26!+\202\25!,\5\25!+\26!+\25" - "!,\25!+\26!,\202\25!,\2\26!,\25!+\202\25!,\7\25\40*\24\37(\22\35&\40" - "'/\34$({}\200yz{\202\310\310\310\202\362\362\362\204\377\377\377\202" - "\325\325\325\10\232\232\233\233\235\235+04.5:\21\33#\24\36(\24\40)\26" - "!*\202\26!+\202\25!+\20\26!+\26\"+\25!,\26!+\25\"+\25!+\26!+\25!,\25" - "\40+\25\40(\22\34%\20\31!INSGJM\247\250\251\246\246\247\202\331\331\331" - "\204\377\377\377\202\367\367\367\202\320\320\320\12\232\232\233\233\234" - "\2357;\77:\77E\20\32\"\22\35&\25\37*\25\40+\26!+\26!,\202\25!+\6\25\40" - "*\24\37(4@777\202\277\277" - "\277\216\377\377\377\202\256\256\256\16'(),/3\16\25\35\21\34%\24!,\25" - "\".\26$/\26#/\26#.\25\",\21\33%\13\22\30pqrooo\202\373\373\373\210\377" - "\377\377\202\246\246\246\4\22\25\27\30\37%\22\34&\24!,\206\26#/\1\26" - "$/\203\26#/\3\26$/\26#0\26$/\202\26#/\1\26$/\202\26#/\2\23\36(\14\23" - "\31\202\273\273\273\204\377\377\377\7\242\242\242\243\243\244\15\25\34" - "\23\36*\26#/\26#0\26$/\202\26#/\203\26$/\202\26#0\20\26$/\35(3NC\31\274" - "\211\0\350\252\0\372\270\2\372\271\7\350\255\13_L\31][V\244\244\244\332" - "\332\332\352\352\352\366\366\366\374\374\374\376\376\376\210\377\377" - "\377\16\376\376\376\372\372\372\365\365\365\356\356\356\273\273\273g" - "W)\307\222\1\366\265\4oS\7\245\244\242\354\354\354\372\372\372\371\371" - "\371\363\363\363\202\376\376\376\206\377\377\377\10\373\373\373\354\354" - "\354\335\335\335\317\317\317{}\177\37(1\31&2\26$/\205\26#/\202\26$/\203" - "\26#/\6\25\".\24!+\20\32\"\13\22\30SUVQQQ\202\352\352\352\216\377\377" - "\377\202\373\373\373\202\200\200\200\4\10\15\22\16\27\36\23\36)\25!." - "\202\26#/\1\26$0\202\26#/\7\26$/\26#/\26#.\24!+\21\33$\32!&\25\27\30" - "\202\263\263\263\222\377\377\377\202\235\235\235\12\22\25\27\30\37%\22" - "\34&\24\40+\26$/\26#/\25\"-\23\37(EKO>\77@\206\377\377\377\10&),.6<\24" - "\40+\25#.\26#0\26#/\26$/\26#0\202\26$/\3\26#/\26$/\26#/\204\26$0\3\26" - "%1\26$0\26%0\202\27$0\6\26$0\25#0\23\40+\20\32#psvlmn\202\373\373\373" - "\222\377\377\377\202\367\367\367\4XZ\\]bf\22\36&\24\"-\202\27$0\4\25" - "\".\23\36*QW[KMO\216\377\377\377\7\304\304\304\305\306\307\22\33#\27" - "#-\27%0\27$0\26$0\202\27$1\1\26%0\202\26$0\7\27%0\27%1\27$0\26$0\26$" - "1\27%0\26$1\202\26%0\3\26$0\23\37)\13\22\31\202\273\273\273\204\377\377" - "\377\13\242\242\242\243\243\243\15\25\34\23\37)\26$0\26$1\26$0\26$1\26" - "%0\27$1\27$0\203\26$0\23\26%1%-3\200_\5\270\207\0\340\245\0\370\266\0" - "\372\270\3\371\270\6\353\256\11\300\220\14oW\26^WD\226\225\220\304\304" - "\304\342\342\342\360\360\360\371\371\371\375\375\375\376\376\376\205" - "\377\377\377\15\375\375\375\370\370\370\362\362\362\350\350\350\213\210" - "\200~^\6\346\251\1\323\234\6eW1\314\314\314\364\364\364\374\374\374\376" - "\376\376\211\377\377\377\11\376\376\376\366\366\366\344\344\344\330\330" - "\330\265\265\2658=A\35)4\26$1\26$0\202\27$1\5\26$0\26%1\26$0\27$1\27" - "$0\202\26%0\4\24\40,\21\33%pswklm\226\377\377\377\5\231\231\231\233\235" - "\237\16\30\37\23\37)\25#/\204\26$0\5\27$1\25#0\25!-!*2\32!&\202\331\331" - "\331\226\377\377\377\12\304\304\304\305\306\307\27\37'\33(2\26#/\26$" - "1\25#/\23\36)DKO>>\77\206\377\377\377\6%(*.6=\24!,\25#/\27$0\26%0\203" - "\26$0\6\26$1\26$0\26$1\26$0\26$1\26%2\205\27%2\10\26%2\27%2\26$1\25\"" - ".\21\34&\13\23\31klmjjj\202\373\373\373\222\377\377\377\202\367\367\367" - "\12UUUX[]\16\30\40\23\40+\27$1\26%2\24\"-\21\34%NRVHHH\216\377\377\377" - "\202\304\304\304\4\20\30\37\26#-\27%1\26%2\202\27%2\2\26%2\27%2\202\26" - "%2\202\27%2\202\26%2\1\27%2\202\26%2\1\27%2\202\26%2\2\22\37*\13\23\32" - "\202\273\273\273\204\377\377\377\4\242\242\242\243\243\243\15\25\35\24" - "\40+\202\26&2\3\26%2\27%2\26%2\202\27%2\30\26%1\27%2\26%2\32)5>\77\206\377\377\377\5%(*." - "6>\24\"-\26$/\27%2\202\26%2\203\27%2\2\26%2\27%2\202\26%2\2\27&3\26&" - "3\205\27&3\5\26&3\25#/\22\36)\\afWYZ\210\377\377\377\202\341\341\341" - "\202\235\235\235\202\204\204\204\202\263\263\263\202\367\367\367\206" - "\377\377\377\202\362\362\362\10\34$)#.8\25%2\26&3\24!-\17\30!\253\254" - "\255\252\252\252\204\377\377\377\202\373\373\373\202\231\231\231\202" - "\346\346\346\204\377\377\377\10\256\256\256\257\257\257\26\34$\33(2\27" - "%3\26&3\27&3\27&4\202\27&3\10\26%3\27&3\27&4\27%2\26%1\26$0\24#/\24\"" - "0\202\25#/\4\26$0\25%1\22\37*\13\23\32\202\273\273\273\204\377\377\377" - "\4\242\242\242\243\243\243\15\26\35\23!,\202\27&3\3\27%3\26&4\27&4\205" - "\27&3\26&3\77ACDKIDXC\10\250{\0\341\245\0\366\264\0\372\267\0\372\267" - "\1\372\270\2\372\270\4\372\271\6\365\266\10\330\241\10\243{\15jV\40U" - "QI\227\227\225\312\312\311\342\342\342\360\360\360\370\370\370\202\375" - "\375\375\13\372\372\372\363\363\363\354\354\354\274\273\272WB\12\324" - "\233\0\355\257\5{b\36\245\245\245\363\363\363\376\376\376\214\377\377" - "\377\7\376\376\376\365\365\365\342\342\342\324\324\324\236\237\23749" - "=\36,9\203\26&3\203\27&3\6\26%4\27&3\26$1\24!.:CI47:\202\373\373\373" - "\206\377\377\377\202\225\225\225\6""333579\11\15\20\11\15\21),.'()\202" - "\200\200\200\202\362\362\362\206\377\377\377\4kklpsw\22\35'\24#0\202" - "\27&3\6\27&4\27%2\24\40,\16\30\40\264\265\266\263\263\263\206\377\377" - "\377\202\341\341\341\202fff\6!\"#\"%(\12\16\21\11\15\20""579333\202\221" - "\221\221\202\373\373\373\204\377\377\377\12\256\256\256\256\257\257\25" - "\34$\32'2\27%2\26&4\26$0\23\40+EKP>>\77\206\377\377\377\4%(+.7\77\24" - "#/\25%1\202\26&3\202\27&3\1\27%3\202\26&3\10\27&3\27%3\26&3\27'4\27'" - "5\27'4\27&5\26'5\202\27'4\5\27&4\24!-\17\31\"X[]UUU\210\377\377\377\202" - "\341\341\341\6\235\235\235\236\237\237\205\207\210\205\206\207\263\263" - "\264\263\263\263\202\367\367\367\206\377\377\377\202\362\362\362\10\34" - "%+#/8\26&3\27&5\24!.\15\26\36\252\253\253\252\252\252\204\377\377\377" - "\202\373\373\373\202\231\231\231\202\346\346\346\204\377\377\377\11\256" - "\256\256\260\261\262\27\"*\34)4\26&4\27&4\27'4\27&4\27&5\202\27'4\15" - "\27'5\26&3\26%1\24\".\23\40+\22\36*\22\36(\22\36)\22\37)\23!,\25\".\22" - "\36)\13\23\32\202\273\273\273\204\377\377\377\11\242\242\242\243\243" - "\243\15\26\36\24!.\27'4\27&4\27'4\27'5\27'4\202\27&5\12\27'5\27&5\27" - "'47AJUTU\253\253\253d`U\204c\7\330\236\0\363\262\0\203\372\267\0\202" - "\372\267\1\26\372\270\3\372\271\5\372\271\7\372\272\11\332\242\11\201" - "`\6^N!xsg\241\241\241\314\314\314\347\347\347\357\357\357\362\362\362" - "\357\357\357\351\351\351mjd\225m\2\350\253\2\324\235\7YVM\327\327\327" - "\373\373\373\216\377\377\377\22\372\372\372\350\350\350\326\326\326\263" - "\263\263]^`%2<\27'4\27&5\27'5\27&4\27'5\27'4\27&4\27'4\25$1\22\36)6<" - "@///\202\373\373\373\206\377\377\377\12\225\225\225\227\230\231;AE=E" - "K\24\36'\24\37'2:B/6<\203\205\207\200\201\201\202\362\362\362\206\377" - "\377\377\12jjjlno\16\27\40\24!,\27%4\27&4\27&3\26$1\21\35'\12\21\26\202" - "\263\263\263\206\377\377\377\202\341\341\341\12hjkknr)18+4=\24\37(\24" - "\37'=EK;AE\223\224\226\221\221\221\202\373\373\373\204\377\377\377\12" - "\256\256\256\260\261\262\27\"+\34*4\27%4\27'5\25%1\23!,EKQ>>\77\206\377" - "\377\377\6%(+/8\77\25#/\26%2\27'5\27'4\203\27&4\1\27'5\202\27&5\3\27" - "'4\27&4\27(6\203\27'6\6\30(6\27'6\30&5\26&3!,5\31\40&\202\356\356\356" - "\206\377\377\377\202{{{\12\11\15\20\15\23\31\14\25\35\16\30\40\16\31" - "!\16\30!\14\26\35\12\22\30\37$'\33\34\35\202\252\252\252\202\377\377" - "\377\202\362\362\362\10<<\205\205\205\300\300\300\330\330\330\343\343" - "\343\335\335\335OI8\277\214\0\363\263\3\230s\16\204\204\204\353\353\353" - "\376\376\376\216\377\377\377\22\373\373\373\355\355\355\330\330\330\305" - "\305\305\201\201\202#.7\27(5\27'6\27(5\27(6\30'6\30(6\27'6\30'5\24!-" - "\16\27\40\300\300\301\277\277\277\204\377\377\377\202\373\373\373\16" - "UUUVWX\12\21\27\16\31\"\22\40+\24\"/\26%2\27%2\25#1\24!-\21\34%\14\25" - "\34""369///\202\356\356\356\204\377\377\377\202\356\356\356\10\31\40" - "&!,6\26%2\27&5\26%3\24!.JQVCEG\206\377\377\377\202\331\331\331\6\33\34" - "\34!'*\16\30!\21\35(\24\".\26%1\202\26%3\6\24\"0\23\37*\17\31#\13\23" - "\32""368///\202\346\346\346\202\252\252\252\12\12\17\22\21\32#\23\40" - ",\25$2\27'6\30'6\26&3\23!.EKQ>>\77\206\377\377\377\5%(+/8\77\25#1\26" - "'4\27(6\202\27'6\5\27(6\27(5\27'6\27(6\27'5\202\27(5\12\27(7\30(7\27" - "(8\30(7\27(8\27(7\27'5\24$1\36'/\26\30\33\202\356\356\356\206\377\377" - "\377\16{{|\177\202\204\22\35%\25\",\23\".\24#/\25$0\24#0\24\".\22\40" - ",'1:%,2\253\254\255\252\252\252\202\377\377\377\202\362\362\362\10AD" - "GFNT\23\".\27&5\30(7\27(7\23\"/\15\26\36\202\273\273\273\204\377\377" - "\377\202\263\263\263\14\4\7\11\13\23\32&.5\"'-\335\336\336\335\335\335" - "\232\233\233\234\236\240\23\34%\26$/\26'4\26'7\203\27)7\11\30(7\30(6" - "\26%4\23\".\21\34'\20\31\40\14\22\27\77AC=>>\204^^^\4""99:;=@\5\11\14" - "\2\3\4\202\273\273\273\204\377\377\377\5\242\242\242\243\243\243\15\27" - "\40\24#0\30(7\204\27(7\202\30(7\13\27(7\30(8\33,9\77DH{{{A6\32\230o\2" - "\270\207\0\336\242\0\360\260\0\370\265\0\210\372\267\0\21\372\267\1\372" - "\270\2\372\270\4\371\270\7\362\263\10\340\246\11\226q\15XH\34ibR\250" - "\250\250\212\212\212y]\23\341\245\1\360\261\6UG\37\305\305\305\365\365" - "\365\217\377\377\377\13\375\375\375\361\361\361\334\334\334\316\316\316" - "\232\233\233\",4\31)8\30(8\27(7\27(8\27)7\202\27(7\3\26(5\22\40+\12\21" - "\30\202\277\277\277\204\377\377\377\202\373\373\373\6VXX\\ae\21\35(\24" - "\"/\25%3\26'5\202\27'6\6\27(5\26&4\24#1\23\40,:BH48;\202\356\356\356" - "\204\377\377\377\202\356\356\356\10\26\31\34\36(1\25$1\27'5\25&4\22\37" - "*GLP@@@\206\377\377\377\202\331\331\331\6\"(.'2:\23\"/\25$2\26&5\26'" - "6\202\30'6\6\26&5\25%3\25$0\22\40+:CI6:>\202\346\346\346\6\252\253\253" - "\254\256\257\21\33#\26$/\27&4\27'6\202\27(7\4\26&5\24\".ELQ>>\77\206" - "\377\377\377\5%)+/8@\26$2\26'5\27(8\202\30(7\1\27(8\202\27(7\5\30(7\27" - "(7\27)7\27(7\30)9\203\27)9\6\30)8\30)9\26&3\22\37+|\177\202www\206\377" - "\377\377\11wwwwxx\12\22\30\20\34&\24$3\27'6\30*9\27)8\30)8\202\30)9\23" - "\27)7\26&3\23!.\17\31#\11\17\25hjkggh9;<=BF\17\32$\24!.\26'5\30(7\30" - "*9\30)8\24#0\16\30\"\243\243\244\242\242\242\206\377\377\377\24sssw{" - "}\16\30\"\17\32#\16\30\40\15\26\36\15\30!\21\35(\24#0\27'6\30)8\27)9" - "\30)8\30)9\30*8\27(7\26&4\23\40,QY_MQT\202\331\331\331\210\377\377\377" - "\204\331\331\331\204\377\377\377\5\242\242\242\243\243\243\16\30\40\25" - "#0\30)8\202\30)9\3\30)8\30)9\27)9\202\30)9\13\30)8\36-;>CH\217\217\217" - "lh^\\E\6\221j\2\275\212\0\331\237\0\356\257\0\370\266\0\211\372\267\0" - "\20\372\267\1\372\270\2\372\270\3\372\271\5\372\271\7\356\261\11\266" - "\207\10u]\31XH\33\276\214\2\366\265\3\264\205\6e`S\325\325\325\363\363" - "\363\374\374\374\216\377\377\377\20\375\375\375\362\362\362\333\333\333" - "\316\316\316\241\241\241+4=\31*:\30)9\27)9\30)9\30)8\27)9\26(6\25'44" - "=E+/3\206\377\377\377\202\225\225\225\4\12\22\30\22\36*\26&4\26(7\202" - "\27)9\203\30)8\7\27)9\30)8\27(7\24\"/\16\30!\\^`YYY\206\377\377\377\10" - "bbchmq\23!-\25&4\25$2\17\32$\247\250\251\246\246\246\204\377\377\377" - "\202\367\367\367\27\36\36\36&,1\22\36*\24$2\27(8\27)8\27*8\30)9\27)9" - "\30)8\30*8\27)9\30)7\27'6\25$1\22\36*\16\30\40\14\25\35\15\27\37\20\34" - "&\24#/\26'6\30)8\202\27)9\5\30)8\26'6\23#/ELR>>\77\206\377\377\377\5" - "&),/9A\25%3\27)7\27)8\202\30)9\1\27)8\204\30)9\2\27)9\30)8\204\30*:\6" - "\30*9\27*:\25%3\17\33&z{}www\206\377\377\377\6www{~\202\20\35(\25$2\27" - "(8\30)9\206\30*:\13\27)8\26'6\24#0\21\35(nsymrvAHODMU\24\"0\26'5\30)" - "9\203\30*:\4\24%4\17\34'\244\246\247\242\242\242\206\377\377\377\12s" - "ssstt\11\20\27\16\32#\22\40,\23\"/\25$2\26&4\27(7\30)9\202\30*:\10\30" - "*9\30*:\27)9\26'6\23!-\16\30!LNQHHH\202\331\331\331\210\377\377\377\204" - "\331\331\331\204\377\377\377\4\242\242\242\243\243\243\16\30!\24$2\202" - "\30*:\1\27*:\202\30*:\1\30*9\203\30*:\13\37/=HLP\243\244\244\307\307" - "\307\214\212\204QJ7{\\\7\251|\1\325\234\0\361\261\0\371\266\0\202\372" - "\267\0\6\371\267\0\364\263\0\361\261\0\363\262\0\370\265\0\371\266\0" - "\203\372\267\0\20\372\267\1\372\270\3\372\270\5\372\271\6\356\260\5\340" - "\244\2\363\262\0\372\271\5\220k\6RM\77\240\237\236\317\317\317\350\350" - "\350\364\364\364\372\372\372\376\376\376\213\377\377\377\10\375\375\375" - "\363\363\363\334\334\334\314\314\314\245\246\2465>G\31+;\27*:\204\30" - "*:\4\27(7\25$21:A)*,\206\377\377\377\5\225\225\225\227\231\232\17\33" - "%\25$2\27)9\203\30*:\1\30*9\204\30*:\5\30*8\26&5\23!-`ejZZZ\206\377\377" - "\377\10bbbfjm\21\36)\26%5\24#1\15\27\40\246\246\247\246\246\246\204\377" - "\377\377\202\367\367\367\6#'*+5>\25%3\27(7\30*:\27*:\207\30*:\12\30*" - "9\27(7\26&5\25$2\24#0\24$1\25&3\27(7\30)9\27*:\203\30*:\4\27(7\23#1E" - "MS>>\77\206\377\377\377\5&),/9B\26&5\27)8\30*:\202\30*9\2\30*:\30):\205" - "\30*:\10\30+;\30+<\30+;\30*<\31*<\31+<\24$2\15\27\40\202\310\310\310" - "\204\377\377\377\202\320\320\320\10\6\13\20\17\34&\26'6\27)9\30+;\30" - "*<\31+;\30+;\203\30+<\13\30+;\30+<\30+;\27*:\26&6\24$1\23#/\24$1\25&" - "5\27)9\30*:\202\30+<\6\30*;\30+<\26'8\23\"/8>D011\202\373\373\373\206" - "\377\377\377\6\200\200\200\204\207\212\20\34'\24$1\27)8\30*:\206\30+" - "<\4\26(8\24#1V\\cPSU\202\373\373\373\222\377\377\377\5\242\242\242\243" - "\243\243\16\31\"\25%3\30+;\202\30+<\202\30*;\204\30+;\26\36/=RVZ\271" - "\271\271\340\340\340\334\334\334\276\276\276nkdgO\16\252}\0\333\240\0" - "\361\261\0\371\266\0\372\267\1\366\264\1\337\244\0\324\233\0\323\233" - "\0\342\245\0\355\256\0\364\263\0\370\266\0\371\267\0\202\372\267\0\202" - "\372\267\1\1\372\270\2\202\372\267\1\14\372\270\4\372\271\11\276\216" - "\11kR\16eZ;\206\205\203\265\265\265\332\332\332\355\355\355\367\367\367" - "\374\374\374\376\376\376\210\377\377\377\7\375\375\375\366\366\366\340" - "\340\340\314\314\314\251\251\252;EN\31,=\203\30+;\6\31+;\30*;\26(7\23" - "\"/djn^^^\206\377\377\377\7#$%,4:\24#1\27(8\30*;\30+;\30+<\202\30+;\2" - "\30*;\30+<\202\30+;\5\30+<\27*9\26'6\20\35(\7\14\20\202\341\341\341\204" - "\377\377\377\6\242\242\242\243\245\245\17\33&\25&4\24#1\13\24\33\202" - "\335\335\335\204\377\377\377\202\242\242\242\12\13\24\34\23!/\27*9\30" - "*<\30+<\30+;\30*<\30*;\30*:\27*:\204\27*9\4\27*:\27)9\30*:\27)9\204\27" - "*9\10\27)9\27):\30*;\31+;\30)9\24$3EMT>>\77\206\377\377\377\5&),09B\26" - "'7\27):\30*<\203\30+;\3\30*<\30+;\31+;\202\30+;\11\30*<\30,=\31+<\30" - ",=\31+<\31,<\30+=\24#1\14\25\34\202\310\310\310\204\377\377\377\202\320" - "\320\320\4\13\24\34\23\"/\27*;\30*;\202\30,=\3\30+<\31+=\30+<\202\30" - ",=\1\30,<\202\30,=\20\30+<\27+;\30)9\27)9\27*:\30*;\30+;\30,=\31+<\31" - "+=\31,=\30+<\27);\26'5" - "Z^a\310\310\310\346\346\346\340\340\340\333\333\333\275\275\275UN<}\\" - "\2\267\206\0\333\240\0\364\262\0\372\270\2\335\243\3fO\15yY\2\237u\1" - "\270\207\0\312\224\0\333\240\0\351\253\0\362\261\0\367\265\0\206\372" - "\267\0\16\372\270\2\372\270\4\372\271\7\362\264\10\316\231\10\227s\20" - "aS*_]X\250\250\250\330\330\330\350\350\350\365\365\365\373\373\373\376" - "\376\376\206\377\377\377\20\375\375\375\370\370\370\346\346\346\317\317" - "\317\255\255\255>@\206\377\377\377\6&)" - ",/:D\26(7\30*;\30+<\30+=\202\30,=\10\31,<\31,=\31+=\31,<\30,=\30+=\30" - "->\31,>\202\31-\77\4\31->\30->\24#2\12\22\31\202\362\362\362\204\377" - "\377\377\16\200\200\200\201\203\204\17\33&\26'6\30->\31->\31-\77\31," - ">\30,=\27*;\27(9\25'6\25'5\25&5\211\25'5\13\25&5\26&6\25(7\26):\30+<" - "\30,=\30*;\23#2\15\30\"uvwsss\210\377\377\377\14\242\242\242\244\246" - "\247\22\36'\30&4\27*:\30,<\30,>\31->\30,=\27+<\33+7\24\35%\202\346\346" - "\346\206\377\377\377\202\314\314\314\202\210\210\210\202\277\277\277" - "\210\377\377\377\6\242\242\242\243\243\244\16\31$\25'6\30->\30,>\202" - "\30->\3\30,>\31,>\30->\202\31->\32\35/\77adg\323\323\323\356\356\356" - "\350\350\350\341\341\341\333\333\333\253\253\253]T:jP\11\260\201\0\340" - "\244\0\372\270\4\323\234\7MH7\220\220\215YVN^M\40~^\6\231q\1\273\211" - "\0\315\226\0\335\242\0\354\254\0\363\262\0\367\265\0\204\372\267\0\20" - "\372\267\1\372\270\2\372\270\3\372\270\5\370\270\7\363\265\10\314\230" - "\12nT\15ZO1\203\200y\264\264\264\330\330\330\353\353\353\366\366\366" - "\373\373\373\376\376\376\203\377\377\377\20\375\375\375\371\371\371\352" - "\352\352\321\321\321\257\257\257;FQ\30->\31,>\30,>\31,>\30,>\30,\77\26" - "(8\21\37+\203\206\210\200\200\200\204\377\377\377\202\352\352\352\4\10" - "\16\24\23!.\30+<\31+=\202\31->\202\30,>\202\31->\2\30,>\31->\204\31," - ">\2\25%4\15\26\37\202\256\256\256\204\377\377\377\202\273\273\273\4\15" - "\27!\25&4\24#2\11\20\27\206\377\377\377\20fffjmp\21\36*\26(7\31,>\31" - "->\30,>\31,>\27*=\26(62\77K0>@\206\377\377\377\4&),0:D\26)" - "8\30,<\202\31->\14\30->\30,>\31->\30-\77\30,>\31,>\30,>\31->\30-@\31" - "-@\31-\77\30.@\202\31-@\2\23$2\11\21\30\202\362\362\362\204\377\377\377" - "\5\200\200\200\203\205\207\21\37+\26(9\31.@\202\31-@\5\31.@\30,<\25'" - "6\21\40.\20\34(\202\16\32$\202\16\32%\3\16\32$\16\33%\16\32$\202\16\33" - "$\15\16\33%\16\32$\16\32%\17\34'\21\40,\24%3\26(9\30,=\30,>\26(9\22!" - "/x}\201tuu\210\377\377\377\202\242\242\242\12\12\17\24\22\35&\23$4\27" - "*;\31,\77\31.\77\30-=\27):\30&2\20\26\31\202\346\346\346\206\377\377" - "\377\202\314\314\314\202\211\211\212\202\277\277\277\210\377\377\377" - "\7\242\242\242\243\243\244\16\32$\25'7\31.\77\31-\77\31-@\202\31.@\40" - "\30-@\31-@\31-\77\31.@\34""0A_be\322\322\322\364\364\364\356\356\356" - "\347\347\347\340\340\340\334\334\334\270\270\270UN<}\\\3\305\220\0\372" - "\270\3\316\231\10SN\77\320\320\320\331\331\331\253\253\253}{udZ\77]F" - "\7\213f\2\260\201\0\305\220\0\327\236\0\350\252\0\365\264\0\371\267\0" - "\205\372\267\0\16\372\267\1\372\270\3\372\270\4\372\271\6\367\267\10" - "\325\236\10\225o\6m[)eaX\222\222\222\322\322\322\346\346\346\363\363" - "\363\373\373\373\202\376\376\376\20\375\375\375\371\371\371\351\351\351" - "\321\321\321\250\251\2527DO\31-@\31.@\31-\77\31.@\31-@\31-\77\26)9\20" - "\37+\203\205\207\200\200\200\204\377\377\377\202\352\352\352\16\11\21" - "\30\24#1\31,\77\31-\77\31.@\31-\77\30-@\31.@\30.@\31-\77\30-@\31-@\31" - ".\77\30-@\202\31-\77\2\24'5\15\31\"\202\256\256\256\204\377\377\377\202" - "\273\273\273\4\15\27!\24'6\24#1\11\20\26\206\377\377\377\14fffjnr\22" - "\40-\27)9\31-@\31-\77\31.@\31-\77\27*9\22!.,4;(-2\202&*,\204&),\1&*," - "\202&),\203&*,\10',0*28\27$0\34,9\26(8\24%4FNU>>@\206\377\377\377\11" - "&),/:E\26*9\30,>\31.@\31-@\31.@\31-@\31.\77\202\31.@\3\31-@\31.@\31-" - "\77\203\31.A\203\31/A\2\24$3\11\21\27\206\377\377\377\6^^^cgk\22\"/\27" - "*;\31.A\31/A\202\31.A\4\26):\21\40-\246\251\253\243\245\246\214\243\243" - "\244\5\243\245\246\245\247\2512=E6EQ\27+<\202\30-\77\5\27);\22\40.\13" - "\25\35ttusss\210\377\377\377\12\277\277\277\300\301\302\31%.\37/<\27" - ",=\30-\77\30+=\24&5ahnZ[\\\206\377\377\377\202\221\221\221\2\6\13\17" - "\13\24\35\202\16\33%\2\14\26\37\7\16\23\202www\206\377\377\377\5\242" - "\242\242\243\243\244\16\33%\25(8\31/A\203\31.A%\31/A\31/@\32.A\31.A\31" - "/A\33/AUZ_\312\312\312\371\371\371\365\365\365\356\356\356\346\346\346" - "\340\340\340\334\334\334\247\247\245[L$\236t\1\351\253\2\261\203\5gc" - "X\321\321\321\342\342\342\340\340\340\332\332\332\313\313\313\246\245" - "\242faUPD!y\\\13\240u\1\311\223\0\355\255\0\366\264\0\363\262\0\362\261" - "\0\365\264\0\370\266\0\203\372\267\0\25\372\267\1\372\270\2\372\270\3" - "\372\271\5\370\270\7\346\253\10\306\224\12\202e\24QH0\206\203|\300\300" - "\277\336\336\336\360\360\360\367\367\367\372\372\372\367\367\367\345" - "\345\345\321\321\321\206\214\221&8G\31/A\204\31.A\5\32.A\26):\21\37," - "\203\205\207\200\200\200\204\377\377\377\202\341\341\341\6\12\22\31\24" - "$3\31/@\31.A\31/A\32.A\212\31.A\4\25'7\16\32$\246\246\247\246\246\246" - "\204\377\377\377\202\273\273\273\4\15\30\"\25'7\24$3\11\20\26\206\377" - "\377\377\6fffknr\22!/\27*;\32/@\32.A\202\31/A\2\25'7\15\31\"\220\377" - "\377\377\6CHMHS]\25'7\24%5FNV>>@\206\377\377\377\6&*,0\27,=\23#1_dhYYY\206\377\377\377\4\221\221" - "\221\224\226\230\17\34'\24$3\202\25(9\4\24%4\20\36+{\177\203www\206\377" - "\377\377\6\242\242\242\243\243\244\17\33&\26(9\31/B\32""0B\202\31/B(" - "\32""0B\31""0C\31/B\31/C\31/B\33""0CFOW\276\276\276\374\374\374\372\372" - "\372\365\365\365\355\355\355\346\346\346\341\341\341\327\327\327xwu^" - "H\16\244x\1\212f\1|xn\337\337\337\353\353\353\352\352\352\346\346\346" - "\342\342\342\337\337\337\334\334\334\304\304\304xxwH<\34\254~\1\354\255" - "\0\357\257\0\335\241\0\322\232\0\331\240\0\350\252\0\362\261\0\367\265" - "\0\371\267\0\203\372\267\0\13\371\266\1\370\266\2\372\267\3\372\271\6" - "\372\271\10\334\243\11yZ\5dU)zta\240\240\240\315\315\315\202\350\350" - "\350\16\331\331\331\312\312\312`kt\33""1D\33""0D\32/C\31""0B\31/C\31" - "/B\32/B\26+;\21\40.\203\206\210\200\200\200\204\377\377\377\202\341\341" - "\341\10\11\21\30\23$3\31.A\32/B\31/B\31""0C\31""0B\32""0B\203\31/B\3" - "\31""0B\32/C\31/B\202\32/B\2\25(8\16\31#\202\246\246\246\204\377\377" - "\377\202\273\273\273\4\16\31#\25(8\24&4\11\21\30\206\377\377\377\5ff" - "fjmp\21\40-\26*;\32/C\203\31/B\2\25&5\12\23\32\220\377\377\377\6\77B" - "DGQY\24&6\25&6FNV>\77@\206\377\377\377\4&*-0\77@\206\377\377\377\10&*-0\31.B\31""1D\32""0D\31""0C\32""0D" - "\202\31""0D\2\31""0C\31""0D\202\32""0D\1\32""1F\202\32""1E\5\31""1F\32" - "1F\32""0E\25'7\13\24\34\202\362\362\362\204\377\377\377\12\200\200\200" - "\201\203\204\20\36+\27+<\31""1E\32""1E\32""0E\32""1E\24'7\11\22\32\220" - "\377\377\377\14\77ADGQ[\27+<\31/B\31""1E\32""1E\32""0D\31/C\27+=\23$" - "3ahn\\^a\202\367\367\367\206\377\377\377\202\242\242\242\6\14\27\40\23" - "&5\25)9\20\36*\223\224\226\221\221\221\204\377\377\377\202\356\356\356" - "\4\22\30\37\33+8\31.B\31""0E\202\32""1E\4\32""0D\31/D\24&5\13\25\36\202" - "\320\320\320\204\377\377\377\6\242\242\242\243\243\244\17\34(\26*<\32" - "1E\32""1F\203\32""1E\17\32""1F\32""1E\32""1F\32""0E\32""1E.>K\225\225" - "\225\374\374\374\376\376\376\375\375\375\371\371\371\364\364\364\360" - "\360\360\357\357\357\361\361\361\202\366\366\366\2\216\214\207\337\337" - "\336\202\373\373\373'\372\372\372\367\367\367\363\363\363\354\354\354" - "\345\345\345\340\340\340b[H\230p\1\351\253\1\344\250\5lX!\235\235\235" - "lkiNG3lS\16\214g\2\256\200\0\307\221\0\332\240\0\354\255\0\354\255\1" - "\260\201\1\232q\0\251|\0\275\212\0\317\227\0\321\231\1\341\244\1\357" - "\257\2\363\262\4\310\224\5\247{\6|]\5VH\37oj]\226\226\226vy|\40""0>\33" - "2F\202\31""1F\7\32""1F\32""1E\32""1F\30-@\24&6mrwfff\206\377\377\377" - "\4\30\33\34\".8\27*=\31/B\204\32""1E\2\32""0E\31""1E\204\32""1E\4\31" - "0E\30.B\22#1\11\20\27\202\325\325\325\204\377\377\377\6\252\252\252\253" - "\254\255\20\36*\27*=\25(9\14\27\40\202\335\335\335\204\377\377\377\202" - "\242\242\242\4\14\27!\25'7\31""0C\32""1D\202\32""1E\2\24'7\11\22\31\220" - "\377\377\377\6:=@DNW\26):\25(8FOX>\77@\206\377\377\377\16&*-0=H\30-\77" - "\31.D\31""1E\32""1E\31""1F\31""1E\32""1E\32""0E\32""1F\32""1E\32""1F" - "\31""1E\204\32""2G\4\32""1G\32""2G\25):\14\27!\202\310\310\310\204\377" - "\377\377\202\320\320\320\4\14\27!\24&7\31""0E\32""1F\202\32""2G\2\25" - "'9\12\23\33\220\377\377\377\16)-04@K\27,\77\31""0D\32""2G\32""1F\32""0" - "D\31""0D\30/A\27,\77\24&5\17\34(ADH<<<\202\352\352\352\206\377\377\377" - "\6<@DDOZ\24%5\17\34'\243\244\245\242\242\242\204\377\377\377\202\273" - "\273\273\5\14\26\37\25(9\32""2E\32""1F\32""2G\202\32""2F\3\32""2G\26" - "*<\16\32%\202\242\242\242\204\377\377\377\6\242\242\242\243\243\244\17" - "\35)\26+=\33""2F\32""2G\202\32""2F\4\33""2G\32""1G\32""2G\32""1G\202" - "\32""2G\15(;J\202\202\203\372\372\372\377\377\377\376\376\376\374\374" - "\374\371\371\371\366\366\366\365\365\365\366\366\366\371\371\371\374" - "\374\374\372\372\372\203\376\376\376(\375\375\375\373\373\373\367\367" - "\367\357\357\357\350\350\350\266\266\266WC\15\310\223\0\364\264\3\302" - "\220\6g`L\311\311\311\333\333\333\301\301\301\220\220\216haMVD\24|[\3" - "\255~\0\305\220\0\345\250\0\345\250\1\255\200\3]D\2I8\11jO\5\210d\2\203" - "`\2rU\3\273\211\1\355\256\1\344\250\4\244y\4\243x\4]F\7_Q+OLC\40-8\34" - "4I\33""3H\202\32""2G\6\32""1F\32""2G\32/B\26*<7AK-/1\206\377\377\377" - "\13www{\200\203\22#2\27,\77\32""1F\32""2G\32""2F\32""1F\32""1G\32""2" - "G\32""1G\202\32""2G\5\32""2F\30/B\26);JS\\BDF\206\377\377\377\10ooos" - "vy\22\"0\27-\77\26+<\16\33&\246\246\247\246\246\246\204\377\377\377\202" - "\367\367\367\10$).-:E\27->\31""0D\32""2F\32""2G\25):\14\26!\210\335\335" - "\335\202\367\367\367\204\377\377\377\202\373\373\373\6\24\31\36\36-;" - "\27,\77\25):FPX>\77@\206\377\377\377\11&*-0=I\30-A\31""0E\32""1F\32""2" - "G\32""2F\32""1G\32""2G\202\32""2F\13\32""1F\32""2G\32""2F\33""2I\33""2" - "H\32""2H\33""3H\32""3H\33""3H\26+=\16\33'\202\310\310\310\204\377\377" - "\377\202\320\320\320\10\7\16\24\21!/\27.A\31""0G\32""3H\32""2H\25*;\15" - "\30\"\220\377\377\377\16+054CP\30.C\32""2G\32""1G\31""1F\30/C\27.@\30" - ",\77\30-\77\26+=\25(8GQ[AFK\202\352\352\352\206\377\377\377\6""99;AK" - "R\22#1\16\33&\243\244\245\242\242\242\204\377\377\377\202\273\273\273" - "\3\14\27\40\25):\32""3G\202\32""2H\1\33""2H\202\32""3H\2\26*=\16\33&" - "\202\242\242\242\204\377\377\377\10\242\242\242\243\243\244\17\35*\27" - ",\77\33""3H\32""3H\32""2I\33""2I\205\32""3H\10\33""3I\"8K\\_c\342\342" - "\342\376\376\376\377\377\377\376\376\376\374\374\374\203\373\373\373" - "\2\374\374\374\375\375\375\204\377\377\377(\376\376\376\374\374\374\367" - "\367\367\357\357\357\341\341\341^\\V\246z\1\344\247\0\367\267\5\224q" - "\20~~}\331\331\331\344\344\344\341\341\341\336\336\336\322\322\322\272" - "\272\271zwoPF+rW\20\237u\2\306\221\1\352\254\1\330\237\3iN\4J@%\\VDV" - "J)rW\15YC\4\177^\3\303\217\1\204a\2YA\2\205d\11YA\2I7\7$03\35""5I\33" - "4I\203\32""3H\5\32""2H\32""1F\30-B9FQ048\206\377\377\377\6wwwxyz\16\32" - "&\24(9\31""0E\32""2G\202\32""2H\1\33""3H\203\32""3H\6\33""3H\32""2F\27" - ",>\22#1FKP@@@\206\377\377\377\10ooouz\177\25'7\30/C\30-\77\21\40.\247" - "\251\252\246\246\246\204\377\377\377\202\367\367\367\11\36\36\36(18\24" - "'6\30.A\32""2G\32""3H\30,\77\20\37,\336\336\337\207\335\335\335\202\367" - "\367\367\204\377\377\377\202\373\373\373\6\26\35%\40""1@\31/B\26*\77@\206\377\377\377\10&+.1>I\31.B\32""1F\33""2H\32""3H\33""2H\32""2" - "H\202\32""3H\6\33""2H\33""3H\33""2H\32""3H\32""4I\32""3I\202\33""4J\6" - "\32""3J\33""4J\27-@\21\"0z|~www\206\377\377\377\12ooouz\177\23%4\27-" - "@\32""2H\33""3I\27.@\21!/cgk__`\204^^^\202bbb\202\362\362\362\204\377" - "\377\377\202\341\341\341\20\12\22\33\24'7\31""1E\32""1F\30-A\26);5ER" - "4ALHT^JWb\26)<\30-A\25);\20\40-AFJ<<<\206\377\377\377\6sssx}\202\21\40" - "-\16\33'\227\230\231\225\225\225\204\377\377\377\202\346\346\346\12\15" - "\27\37\27*:\32""1F\33""3I\33""4I\33""3J\33""3I\31""3H\25(:\14\27!\202" - "\314\314\314\204\377\377\377\7\242\242\242\243\243\244\17\35*\26-\77" - "\33""4I\32""4J\33""3J\203\32""4I\12\33""3I\33""3J\32""4I\33""3I\35""4" - "J8BL\261\262\262\375\375\375\376\376\376\377\377\377\205\376\376\376" - "\205\377\377\377'\376\376\376\373\373\373\365\365\365\355\355\355\250" - "\250\250gS\34\313\225\0\362\261\2\363\264\7[N*\264\264\264\351\351\351" - "\356\356\356\352\352\352\347\347\347\343\343\343\341\341\341\340\340" - "\340\314\314\314\222\222\222jdTdR!\200_\5\311\223\1\353\255\2\327\236" - "\3\221l\7lZ(96/\\P.qV\15\207c\2\240v\0kN\1F<\36YK'>3\17\"18\34""4J\202" - "\32""4I\7\32""3J\33""3I\32""3J\33""3I\31""1F\24'8\12\25\35\202\320\320" - "\320\204\377\377\377\202\367\367\367\10@DGGPY\25(9\27-A\32""1F\31""3" - "H\33""4I\32""4J\202\32""2H\4\31/D\27+>*8E#+2\202\335\335\335\204\377" - "\377\377\202\373\373\373\10\"$'*7B\27,@\32""2F\31""0D\24'7OV[HHH\206" - "\377\377\377\20\314\314\314\314\314\315\34&0#3B\30-@\31""0D\30/B\25)" - ":\20\40-\15\31#\13\25\36\13\24\35\11\21\30\6\14\22""000///\202\373\373" - "\373\204\377\377\377\202\267\267\267\6\15\31$\25*<\31""1E\27,>GQY>\77" - "A\206\377\377\377\11%),/=G\27-@\31""0D\32""1E\32""1F\32""1G\32""2G\32" - "1H\202\32""3I\4\33""3J\33""3I\33""4J\32""5K\204\33""5K\5\33""4K\31""1" - "F\24'8|\201\205www\206\377\377\377\16ooopqq\14\31#\23%6\30/D\32""3I\31" - "1F\25);hrzfmsdkpdjpdimbfi\202bbb\202\362\362\362\204\377\377\377\202" - "\341\341\341\20\15\31#\26*<\31""1F\30-B\23&5\16\34(.4;+/4ADHDKR\21!0" - "\26,>\30/B\26*@A\206\377\377\377\6sssw{\177\20\37,\17\35*\227\232" - "\233\225\225\225\204\377\377\377\202\346\346\346\4\12\17\24\25$1\30." - "C\32""3H\202\33""4K\4\32""3I\31""1E\22$3\10\20\27\202\314\314\314\204" - "\377\377\377\4\242\242\242\243\243\244\20\37+\27-@\202\33""4K\3\33""5" - "K\33""4K\33""5K\202\33""4K\203\33""5K\5\33""4K*K\31""0E\32""4I\32""1G\27-@R\\eKMP\206\377\377\377\202\314\314" - "\314\16\23\26\30\33$,\22\"1\25)<\30/C\30""0C\27-A\26+=\25):\23&6\17\36" - "+\12\25\36""124///\202\373\373\373\204\377\377\377\10\267\267\267\270" - "\271\272\20\40-\27.A\32""1G\27,\77GQZ>\77A\206\377\377\377\4%&(.9C\24" - "(9\26*=\202\27,\77\14\27,@\30.B\30/D\32""2G\32""3I\32""4J\33""5K\32""4" - "K\33""5L\33""6M\33""6L\33""5L\202\33""5M\4\32""3I\27.B'5B\36\"&\202\356" - "\356\356\204\377\377\377\202\373\373\373\14pqquz\177\22#2\25*<\27,A\26" - ",>\24'9\23%4\21#2\21!/\22\37)\15\26\36\202\225\225\225\206\377\377\377" - "\10\200\200\200\202\204\205\20!.\26,\77\37""4G\32,:\211\215\221\205\206" - "\207\202\373\373\373\202\362\362\362\6-4:4AN\25*<\26*=\23#/\11\15\21" - "\202\352\352\352\204\377\377\377\6\200\200\200\203\207\212\20\40.\21" - "!0din^^^\206\377\377\377\4www|\200\204\22#2\26+>\202\30/D\4\26,\77\23" - "&7agmZZZ\206\377\377\377\11\242\242\242\243\243\244\20\37,\27.B\33""6" - "L\34""5L\33""6M\33""6L\33""6M\203\33""5M\10\33""6L\33""5M\33""5L\":N" - "HPV\310\310\311\371\371\371\376\376\376\211\377\377\377\25\376\376\376" - "\373\373\373\364\364\364\355\355\355\262\261\257fQ\27\273\212\0\362\262" - "\1\372\271\7z\\\11\235\235\235\356\356\356\375\375\375\374\374\374\372" - "\372\372\370\370\370\366\366\366\363\363\363\360\360\360\355\355\355" - "\353\353\353\202\351\351\351\17\350\350\350\336\336\336\231\231\231b" - "]Pt[\31\242v\2\336\243\2\342\247\4\203f\27meQUL3<1\24C2\1sX\12#29\202" - "\33""6M\3\33""5L\33""6L\34""5M\202\33""5L\6\33""6L\33""5M\31""1F\24'" - "8GMS@@@\206\377\377\377\202\373\373\373\12\200\201\201\204\210\213#0" - "<&6D\25):\25*;\33.>\32)7emradg\202\346\346\346\206\377\377\377\12\214" - "\214\214\214\215\216\16\34)\26+>\33""4K\33""6K\32""4K\31""3G\24'8\13" - "\25\36\202\273\273\273\206\377\377\377\202\335\335\335\12Y^a^el\31+8" - "\34/>\26)<\25*<\33.\77\32*7elradf\202\352\352\352\206\377\377\377\10" - "<<\77A\206\377\377\377\4UUUY^aENVFQY" - "\202GQZ\11GS\\IU`,=L/BS\31""1G\32""5J\33""5L\33""5M\33""7N\202\33""6" - "N\7\34""6N\33""7N\34""7N\33""5L\31""2H+\77A\206" - "\377\377\377\202UUU\2=>>>>@\202>\77A\14\77ADAGK&2=+\32""4K\33""6N\32""5K\30.CGQ[>\77A\216\377\377\377\10aeggpy\26-@\32" - "4J\34""7O\34""8P\34""8Q\34""9Q\204\34""8Q\202\34""8P\4\32""3J\26+\77" - "nu|ghj\210\377\377\377\202\325\325\325\202\204\204\204\202^^^\202\204" - "\204\204\202\331\331\331\206\377\377\377\202\335\335\335\6\32'2!6H\31" - "2H\27/Dlu~ehj\206\377\377\377\202\352\352\352\202www\202\273\273\273" - "\206\377\377\377\6VXZ^hp\27.B\31""2H%8J\34'2\202\352\352\352\206\377" - "\377\377\202\256\256\256\202jjj\202\242\242\242\202\373\373\373\206\377" - "\377\377\14\242\242\242\243\243\244\20\40/\30""1F\34""8Q\34""9Q\34""8" - "Q\33""8Q\34""8Q\33""8Q\34""8Q\33""8Q\204\34""8Q\12\36""9R+:Gxz{\355\355" - "\355\374\374\374\313\313\313\212\210\204\315\315\315\357\357\357\374" - "\374\374\202\377\377\377\15\375\375\375\371\371\371\361\361\361\351\351" - "\351}}}\202a\5\321\231\0\364\263\1\360\262\7x]\22\247\246\245\363\363" - "\363\376\376\376\207\377\377\377\7\376\376\376\375\375\375\373\373\373" - "\371\371\371\367\367\367\364\364\364\362\362\362\202\360\360\360\12\357" - "\357\357\355\355\355\345\345\345\302\302\302\222\221\216cW9B4\15#*&\40" - ":P\35""9R\202\34""8Q\15\33""8Q\34""8Q\33""8Q\34""8Q\34""8P\34""9Q\34" - "8Q\34""8P\33""7O\31""1G\24(9\211\216\222\204\204\204\212\377\377\377" - "\202\341\341\341\202\373\373\373\210\377\377\377\7\304\304\304\305\306" - "\307\27&2\36""3G\32""5M\33""7O\33""8Q\203\34""8Q\4\32""6M\31""3J,=M%" - "/9\202\346\346\346\210\377\377\377\202\373\373\373\202\335\335\335\202" - "\373\373\373\210\377\377\377\12\204\204\204\211\216\222\24(9\31""1G\34" - "7P\34""8Q\32""5M\27/DGR\\>\77A\216\377\377\377\4^__fms\26+\77\32""4K" - "\202\34""8Q\203\34""9R\3\34""9S\34""9R\34""9S\202\34""9R\6\34""7O\31" - "4K\24);\15\31$\204\204\205\204\204\204\224\377\377\377\202\346\346\346" - "\10''(/9A\25*=\31""3J\33""7N\31""4J\27):\15\27\37\202\314\314\314\216" - "\377\377\377\202\341\341\341\10\21\31\40\33.>\32""4L\33""7O\31""2H\22" - "%6V[_QQQ\202\373\373\373\222\377\377\377\7\242\242\242\243\243\244\21" - "!/\30""1F\34""9R\34""9S\35""9R\203\34""9S\202\34""9R\34\34""9S\34""9" - "R\34""9S\34""9R\35""9R#=S8AI\255\255\256\264\264\263n^0\212h\13f`Q\305" - "\305\305\363\363\363\375\375\375\376\376\376\374\374\374\366\366\366" - "\356\356\356\277\277\277[P1\271\210\1\346\250\0\371\267\3\324\235\7l" - "_;\310\310\310\371\371\371\212\377\377\377\17\376\376\376\375\375\375" - "\374\374\374\372\372\372\367\367\367\365\365\365\362\362\362\356\356" - "\356\350\350\350\334\334\334\314\314\314\273\273\273\210\210\210.0.\40" - "6H\202\35""9R\2\34""9R\34""9S\205\34""9R\2\35""9S\34""9S\202\34""9R\6" - "\34""7O\31""3J\23'8\14\30\"xxywww\202\373\373\373\220\377\377\377\202" - "\256\256\256\20\23\30\35\32)5\26.B\32""5L\35""9Q\34""9R\34""9S\34""9" - "R\34""9S\34""9R\35""9Q\34""8P\32""2I\24)<(2;\40\"#\202\314\314\314\222" - "\377\377\377\202\204\204\204\12\13\27!\23&8\31""3J\33""7O\34""9S\35""9" - "S\34""6N\30""0FGS]>AB\216\377\377\377\4^^^elr\25+>\33""5L\202\34""9R" - "\2\34:T\34""9T\202\34:T\202\35:T\10\34""9T\35:T\34""9S\33""8Q\31""2H" - "\23(:\211\216\222\204\205\205\224\377\377\377\202\346\346\346\12/8@5" - "FU\30""3I\33""7P\34""8S\33""8Q\34""2G\24#1\315\315\316\314\314\314\216" - "\377\377\377\202\341\341\341\10\27$1\37""5H\34""8Q\34""9S\33""6N\27/" - "D[enTY[\202\373\373\373\222\377\377\377\7\242\242\242\243\243\244\21" - "!0\30""2H\34:T\35:T\34:T\202\35:T\4\34:T\35:T\34:S\35:T\204\34:T\27\35" - ":T&>RGNSNLI\237u\4\356\257\4\201`\6wtm\334\334\334\370\370\370\375\375" - "\375\371\371\371\362\362\362\342\342\342\177|t\205c\7\324\233\0\363\262" - "\1\372\271\5\253\201\13vur\346\346\346\375\375\375\214\377\377\377\17" - "\376\376\376\374\374\374\373\373\373\367\367\367\361\361\361\351\351" - "\351\337\337\337\320\320\320\303\303\303\246\246\246CFH!5G\35;U\34""9" - "S\36;T\206\34:T\1\35:T\203\34:T\7\35""9T\34""9S\33""8Q\31""2H\23(:}\203" - "\211yz|\202\373\373\373\220\377\377\377\11\256\256\256\260\262\264\33" - "+9!7J\32""5M\33""7Q\34""9T\35:T\34:S\203\35:T\10\34:T\34:R\34""7P\31" - "3J/AQ)5\77\315\315\316\314\314\314\222\377\377\377\6\204\205\206\211" - "\216\222\23'8\30""1G\34""7P\34""9S\202\34:T\4\34""7P\30""2HHT`\77CF\216" - "\377\377\377\7___fnu\26,A\32""6N\35:T\34:T\34;U\202\35;U\202\35;V\202" - "\35;U\11\35:U\35;V\34:U\33""9R\32""4L\24);\16\33(TVYQQQ\202\331\331\331" - "\216\377\377\377\202\273\273\273\14!\"%(2<\24)<\31""3J\34""9S\35:T\34" - ";U\35:V\33""6O\27.B'3>\36!$\202\310\310\310\212\377\377\377\202\341\341" - "\341\4""777\77GO\24*=\32""5L\202\34:U\6\34:T\33""7P\26-B\20\40/TY\\Q" - "QQ\202\352\352\352\210\377\377\377\202\356\356\356\202\346\346\346\204" - "\377\377\377'\242\242\242\243\245\247\22$4\30""3J\35;U\35:U\34;U\35:" - "U\34;U\34;V\35;V\34:U\34;V\35;V\35;U\34;U\35;V\35:U\36;V':L/36pU\13\342" - "\246\2\345\252\11OB\40\247\247\247\352\352\352\372\372\372\366\366\366" - "\356\356\356\311\311\311\\M%\257\200\1\344\247\0\371\267\3\372\271\7" - "r[\34\242\242\242\361\361\361\214\377\377\377\202\376\376\376\15\374" - "\374\374\372\372\372\365\365\365\354\354\354\341\341\341\322\322\322" - "\305\305\305\257\257\257abc\35,:\36M[6>D\216\273\273\273\17LQTS^h\27/F\33" - "7P\34;U\34;V\35W\35\31""3J\33""9R\34:T\202\35\31""4K\34""9U\35;W\3537;>AD>AB>AD@DI/9A4DQ\27""1G\33""9R\35Y)" - "BX*,#\251|\0\351\254\3\356\261\12lW\35\215\215\215\333\333\333\354\354" - "\354\277\277\277^M\40\274\212\0\343\246\0\371\267\1\372\270\6\241x\5" - "}}}\352\352\352\375\375\375\210\377\377\377\202\376\376\376\16\375\375" - "\375\374\374\374\371\371\371\366\366\366\357\357\357\350\350\350\337" - "\337\337\321\321\321\305\305\305\261\261\261eef\",4\">V\35\21#3\16\35+\13\30#\12\23" - "\34(/5'-2\202',1\13#)/%,2\12\24\35\14\31%\20\37.\22&8\27.D\31""4M\34" - "9T\35Z\7\35=Y\35>Z\36=Z\35>Z\35=Z\35>Y\36=Z\203\35" - ">Z\13\36=Y\35O]\27" - "/E\31""5M\33:T\35;V\36=Y\35>Z\36=Z\35=Z\35>Z\35=Z\35>Z\36>Y\34Z\36>Z\202" - "\35=Z\32\35>Y\34N]=LZHVaGU`HUaIXd6IY:Ob\33""8Q\35" - ";W\202\35=Z\202\35=Y\206\35=Z\4\35>Y\36=Z\36>Y\35=Z\202\35>Z\23\40@[" - "*6\77\216i\2\331\237\0\370\267\7\276\216\14YO4\254\254\254\325\325\325" - "iga\232r\5\327\236\0\361\261\0\372\270\3\357\261\7SA\16\313\313\313\364" - "\364\364\376\376\376\204\377\377\377\204\376\376\376\24\375\375\375\373" - "\373\373\371\371\371\366\366\366\361\361\361\353\353\353\342\342\342" - "\331\331\331\321\321\321\303\303\303\244\244\244Z\\\\\36*4#\77W\36\77" - "Y\35=Z\36>Z\35>Y\36=Z\36>Z\202\35=Z\13\36=Z\35=Z\36=Y\36=Z\35>Z\35=Z" - "\36>Z\36=Z\35=Y\35>Z\35>Y\202\36=Z\33\36>Y\35=Y\34;W\33""9T\32""7P\31" - "4K\27""1H\27/D-BS+\77P2DT2DS2DT3FW\26.C\30""0F\30""3K\31""5N\32""8R\33" - ":T\35Y\36=Z\35>Z\36>Z\35>Z\36=Y\202\36=Z\23\35>Z\36>Y\35=Z\35>" - "Z\35=Z\35>Y\35=Y\35>Y\36=Z\35Z\36=Z\35>Y\36>Z\203\35=Y\12\35>Z\35=Y\34;V\34""9S\31""6O\31" - "5M\30""4L\30""3L\30""3K\31""4L\202\30""4L\1\31""4K\202\30""4L\202\31" - "4L\11\31""6N\32""8Q\34:U\35=X\35>X\35>Z\35>Y\36\77\\\36\77[\202\35>[" - "\203\36>[\32\36\77[\36>\\\35>[\36>[\36\77\\\35\77[\36\77[\35>[\36>[\35" - "=Y\33\26" - ".D\31""4L\33""8R\34=W\202\36>[\1\36\77[\203\35>[\202\36>[\24\36\77\\" - "\36>[\35\77[\36>[\36\77[\34[\202\36\77" - "[\1\35\77[\202\35>[\202\36>[\33\35=[\35;W\32""9S\31""4M\27""0F\25,A\24" - "+>\24*>\25,@\26.E\30""2J\32""6P\33""8R\32""8S\32""6P\31""5N\31""5M\32" - "6M\32""6O\33""7Q\34;U\34=X\35=Z\36>\\\36\77[\36\77\\\35>[\202\36>[\2" - "\36\77[\36>[\202\35>[\30\36>\\\35\77[\36\77[\35\77[\36\77[\36>\\\35\77" - "[,CUiP\11\277\214\0\364\263\3\367\267\12\233t\10VSJ\211\210\207oW\27" - "\307\221\0\350\252\0\370\266\1\372\270\5\275\214\6f^G\343\343\343\372" - "\372\372\203\377\377\377\1\376\376\376\202\375\375\375\25\374\374\374" - "\373\373\373\371\371\371\367\367\367\364\364\364\360\360\360\352\352" - "\352\343\343\343\335\335\335\322\322\322\310\310\310\272\272\272\224" - "\224\224DFG\35*5%AZ\35>\\\37\77[\35\77[\35>[\36\77[\206\36>[\4\35>[\35" - "\77[\36>[\35>\\\203\36>[\3\35>\\\35\77[\36>[\202\35>[\3\36>[\36\77\\" - "\36\77[\202\36>[\22\36>Z\35=Y\35;X\34;U\33:T\33""9T\34:U\34:V\34=X\36" - "=Z\35>[\36\77[\35>[\36>\\\35\77\\\36\77[\35>[\36\77[\202\36>[\5\35>\\" - "\36\77\\\36>[\36>\\\36\77[\202\36>[\6\36>\\\36>[\36\77\\\36>\\\36\77" - "\\\36>[\202\35>[\21\35\77\\\36>\\\35>[\36=Z\35[\35>\\\35\77[\36>[\35>\\\202\36>[\5\35>[\36\77" - "[\36>[\35>\\\36\77[\202\36>[\1\35>[\203\36>[\10\36\77\\\36>[\35\77\\" - "\36>[\35>[\36\77[\35\77[\36\77[\202\36>[\1\36\77\\\202\36>[\15\35\77" - "[\36\77[\35>\\\35\77[\36>\\\35>[\35\77]\36\77]\36@]\36\77]\36@]\35\77" - "\\\36@]\202\36\77]\37\35\77]\36\77]\36@]\36@\\\35\77]\36\77]\36@\\\36" - "\77[\35>[\35Z\36>\\\36\77\\\36@]\36\77]\36@]\36\77]\36@]\36" - "\77]\202\36@]\1\36\77]\202\36@\\\20\36@]\36\77\\\35>[\33\\\36@]\202" - "\36\77]\2\35\77]\36\77\\\202\36\77]\203\36@]\21\36\77]\36\77\\\35\77" - "\\\35=Z\34Y" - "\35[\36\77\\\35\77]\36\77]\202\36@]\7" - "\36@\\\35@]\36@]\36\77]\36@]\36\77]\36@]\202\36\77]\1\36@\\\204\36\77" - "]\21$C^A9\31\253}\0\347\251\0\372\271\6\362\264\12w]\26\\Q5\252|\0\336" - "\243\0\363\262\0\372\270\2\370\267\6\212h\10\215\214\207\354\354\354" - "\373\373\373\202\375\375\375\202\374\374\374\25\372\372\372\371\371\371" - "\367\367\367\364\364\364\361\361\361\356\356\356\351\351\351\344\344" - "\344\335\335\335\325\325\325\316\316\316\304\304\304\262\262\261\203" - "\203\204246\40.;&D^\37\77]!B_\37@^\36@]\202\36\77]\4\36\77\\\36@]\36" - "\77]\36\77\\\202\36\77]\15\36\77\\\36\77]\35@\\\36\77\\\36\77]\36\77" - "\\\36@]\36\77\\\35\77\\\35\77]\36@]\36\77]\36\77\\\202\36\77]\1\36@]" - "\202\36\77]\4\36\77[\36>[\34>[\35=Y\202\35>Z\5\35>[\36>\\\36\77\\\36" - "\77]\36\77\\\206\36\77]\2\35\77\\\36\77]\202\36\77\\\4\36@]\35\77\\\36" - "\77]\36@\\\202\36\77]\202\36@]\13\36\77]\36\77\\\36@\\\35\77]\36\77]" - "\36@]\36\77]\35\77]\36@\\\36\77\\\35>[\202\35=Z\1\35=Y\202\35>[\6\35" - "\77[\35\77]\36@]\36\77\\\35\77\\\36\77]\203\36\77\\\5\36@]\36\77\\\35" - "@]\36\77]\35@\\\203\36\77]\203\36@]\1\36\77\\\202\36\77]\1\36@]\202\36" - "\77]\6\35\77\\\36\77]\36\77\\\36\77]\35@]\36\77\\\204\36\77]\10\36\77" - "\\\36\77]\37A^\36@^\36A^\37@^\36@^\37@^\205\36@^\11\36A^\36@^\37A^\36" - "@_\37@^\36@^\37@_\36A^\37@^\211\36@^\202\37@^\2\36@_\37@_\202\36@^\202" - "\36@_\1\36A^\202\36@^\2\37@^\36A^\202\36@^\203\36A^\1\36@_\203\36@^\2" - "\37A^\36@_\203\36@^\1\37@^\203\36@^\5\36@_\36@^\37@^\36@^\36A^\202\36" - "@_\1\36A^\203\37@_\2\36@^\37@^\207\36@^\203\37@^\4\36A^\36@_\36@^\36" - "@_\203\36@^\1\37@^\203\36@^\12\37A^\37@^\36@^\36A^\37@^\36@_\36A^\37" - "@_\36@^\36A^\206\36@^\21\36A^\37A^3=<\213g\3\321\231\0\370\266\1\372" - "\270\6\361\262\6\322\232\1\362\261\0\361\261\0\371\266\1\372\270\3\340" - "\246\7l[*\257\257\257\353\353\353\204\363\363\363\24\362\362\362\360" - "\360\360\355\355\355\352\352\352\346\346\346\342\342\342\336\336\336" - "\331\331\331\324\324\324\314\314\314\301\301\301\251\251\251yyz367\35" - "\"'0\77M)Gb\40A_\40C`$C`\202\36@^\2\37@_\36@^\202\36A^\2\36@^\36A^\202" - "\36@^\202\36@_\204\36@^\1\36@_\203\36@^\2\37@^\36A^\202\36@^\5\36@_\36" - "@^\36A_\36@^\36A^\203\36@^\5\37A^\36@^\36@_\36@^\36@_\204\36@^\4\37@" - "^\36@^\36A^\36@_\202\37@_\202\36@^\1\36A^\203\36@^\202\36A^\7\36@^\37" - "@^\36A^\36@^\37@_\36@^\37@_\202\36@^\11\37@^\36@^\36A^\36@^\36A^\36@" - "^\36@_\36@^\36A^\203\36@^\1\36@_\202\37@^\1\36A^\204\36@^\5\36A^\36@" - "^\36@_\36@^\36@_\202\36@^\1\36A^\204\36@^\6\36@_\36@^\37A^\36@^\37@^" - "\36A^\207\36@^\3\36A^\37@^\36A_\202\37A`\4\37B`\37A_\36A_\37A`\203\37" - "A_\202\36A_\4\36A`\37A_\36B`\37B_\202\36A_\3\37A`\36A`\36A_\202\37A_" - "\4\37A`\36A_\37A`\37B`\205\36A`\202\36A_\1\37A_\204\36A_\1\36A`\202\37" - "A_\1\36A`\202\37A_\1\37A`\202\36A`\1\37A_\202\36A_\202\36A`\24\37A`\36" - "A`\37A`\36A`\37A_\36B`\37A_\36A`\37A_\37A`\36A`\37A_\36A_\36A`\37A`\36" - "A_\37A`\36A_\36A`\36B`\202\37A`\2\36A_\37A`\202\37A_\3\37A`\36A`\37A" - "`\202\36A`\1\36A_\202\36A`\4\36A_\37A`\36B`\37A_\202\37A`\2\36A_\37A" - "_\203\37A`\6\36A_\37A_\36A_\37A_\37A`\36A_\202\36A`\12\36A_\37A`\37A" - "_\36A_\36A`\37A`(9CqU\5\302\216\0\357\257\0\202\372\267\2\11\372\267" - "\1\371\266\0\370\265\0\372\267\1\372\270\5\305\222\10_ZN\303\303\303" - "\344\344\344\204\347\347\347\24\346\346\346\344\344\344\341\341\341\336" - "\336\336\331\331\331\323\323\323\316\316\316\312\312\312\273\273\273" - "\242\242\242\201\201\202CEG&-1-\77@TUUnoo\211\211\211\203" - "\221\221\221\14\220\220\220\215\215\215pppSTT;=@'/6$3\77)=M'AX!C`\37" - "Bc\37Cc\203\37Cb\1\37Bb\202\37Cb\4\37Bc\37Cb\37Cc\36Cb\203\37Cc\6\37" - "Cb\37Cc\37Cb\37Cc\37Cb\37Bc\202\37Cb\2\37Cc\37Cb\204\37Cc\202\37Cb\1" - "\37Cc\202\37Bb\202\37Cb\1\37Bc\202\37Cb\204\37Cc\3\37Bc\36Cc\37Cc\202" - "\37Cb\13\37Cc\37Bc\37Cb\36Cc\37Cb\37Cc\37Bb\37Cb\37Bc\37Cb\37Cc\207\37" - "Cb\203\37Cc\1\37Bc\203\37Cb\202\37Cc\2\37Bb\37Cc\202\37Cb\1\37Cc\203" - "\37Cb\1\37Cc\202\37Cb\1\37Bc\203\37Cb\16\37Cc\37Cb\36Cc\37Bc\37Cb\37" - "Cc\37Cb\37Bc\36Cb\37Cb\37Bc\37Cc\37Cb\37Bc\202\37Cb\3\37Bc\37Cb\37Bb" - "\202\37Cc\13\37Cb\37Cc\37Cb\37Cc\37Bc\37Cb\36Cc\36Cb\37Cb\37Cd\40Cd\202" - "\37Dd\3\40Cd\37Cd\40Cd\202\37Dd\202\37Cd\202\37Dd\2\37Cd\37Dd\202\37" - "Cd\207\37Dd\1\37Cd\202\37Dd\7\37Cd\37Dd\37Dc\40Dd\37Cd\40Cd\37Dd\202" - "\37Cd\203\37Dd\1\40Dd\205\37Dd\1\37Cd\204\37Dd\14\37Dc\40Cd\37Dd\37C" - "d\37Dd\37Cd\37Dd\37Cd\37Dd\40Dc\37Dd\37Cd\212\37Dd\1\40Dd\202\37Dd\4" - "\40Cd\37Cd\40Dd\40Cd\203\37Dd\202\37Cd\202\37Dd\1\37Cd\202\37Dd\202\37" - "Cd\2\40Dd\37Cd\202\37Dd\1\40Cd\202\37Dd\1\37Cd\202\37Dd\2\40Dc\37Cd\205" - "\37Dd#\37Cd\37Dd\37Cd\37Dd\40Dd+DW]K\17\251|\1\326\234\0\360\260\0\370" - "\266\0\372\267\0\372\267\1\372\270\5\303\220\10).11FY+BX';K#2\77$0;%" - "08%/8%08%/9$0;\"2@&:L'AW%D_\"Dd\40Ed\40Dd\37Dd\37Cd\203\37Dd\1\40Dd\202" - "\37Dd\5\37Cd\37Dd\40Cd\37Dd\37Cd\202\37Dd\2\37Cd\40Dd\202\37Dd\1\40D" - "d\203\37Dd\1\37Cd\203\37Dd\7\37Cc\37Dd\37Dc\37Dd\40Dc\37Cd\40Dd\211\37" - "Dd\15\40Cd\37Cd\37Dd\37Dc\37Cd\37Dd\37Cd\40Cd\37Dc\37Dd\40Dd\37Cd\37" - "Dd\202\37Cd\4\37Dd\40Cc\37Dd\37Cd\203\37Dd\2\37Cd\40Dd\202\37Cd\203\37" - "Dd\1\40Cd\204\37Dd\1\40Cd\204\37Dd\2\37Cd\37Dc\202\37Dd\20\37Cd\40Dd" - "\37Dd\37Cd\37Dd\37Cd\37Dd\37Cd\37Dd\37Cd\37Dd\40Cd\37Cd\37Dd\40Cd\40" - "Dd\205\37Dd\202\37Cd\3\37Dd\40Dd\37Dd\202\37Cd\203\37Dd\204\37Cd\202" - "\40Ef\14\37Ef\40Ee\37Ee\37De\40Ee\37De\37Ee\37De\40Df\37Ee\40De\40Ee" - "\202\40De\20\37Ef\40Ee\37Df\37Ee\40De\37Df\40De\40Ee\40Ef\37Ef\37De\40" - "Ee\40Df\37Ef\40Ee\37Df\202\40Df\5\37Ef\37Df\40Ee\40Ef\40Ee\202\37Ee\3" - "\40Ee\37Ee\37De\202\40De\202\40Ee\11\37De\40De\37Df\37De\40Ee\40De\40" - "Df\40Ee\37Ef\202\37Ee\2\37De\40Ee\202\40Df\1\37Ef\202\37Df\6\40De\37" - "Ee\37Df\40Dfdw\214\337\342\346\202\377\377\377\5\352\354\356u\206\227" - "\37EeQh\200\337\342\346\202\377\377\377\3\337\342\346Qh\200\40Ef\202" - "\37Ee\4\37De\40De\37De\37Ef\202\37Df\12\324\330\335\224\240\255\37Df" - "\37Ee\37De\40Ef\40De\37De\37Df\40Ee\202\37Ee\6\37Dev\206\227\352\354" - "\356\40De\37De\37Ef\202\37Ee\14\37De%Hf4@BiO\6\261\202\1\337\243\0\363" - "\262\0\371\266\0\372\270\3\370\267\7iR\15""7M_\202!Ff\20!Egey\214\377" - "\377\377\365\366\367%Hg%Gg%Hh#Ff!Ef\40Ef\37Ef\37Df\40Ee\37Df\37EeQh\200" - "\204\377\377\377\31\337\342\346;Ws\242\254\267\274\303\312\37De\40De" - "\37Ee\37De\40Ef\37Ee\37De\40Ee\37Ee\40Ef\37Ef\37De\37Ee\40Ef\37Ee\40" - "Efu\206\227\242\254\267\37Ee\37Df\37Ee\203\40De\7Qh\200\310\315\324\37" - "Ee\324\330\335\224\240\255\40Ef\40De\203\37Ee\13\40Ef\37Df\37De\37Ef" - "\40Df\37Ef\40Ef\40Ee\40DeQh\200\337\342\346\202\377\377\377\5\242\254" - "\267\40Df\37De\40Ee\37Ee\205\377\377\377\1\224\240\255\203\37Ee\2\40" - "Df\40De\203\40Ee\3\40Df\40De\40Ef\202\37Ef\23\40Ee\242\254\267\274\303" - "\312\37De\40Ee\40Ef\40De\37Ef\37Eev\206\227\242\254\267\37De\37Ee\40" - "Df\37Ef\37Ee\40De\37Ef\37Df\202\37Ee\3\40Ef\40De\40Ee\202\37Ef\4\40E" - "e\37Df\37Ef\40Ef\203\37Ee\27\40Df\40Ee\40Df\40De\37Ee\40Ef\37De\40De" - "\40Eg\40Fg\40Ff\40Fg\37Eg\37Ff\40Eg\37Eg\40Fg\37Ef\40Ef\40Fg\37Eg\37" - "Fg\37Eg\202\37Fg\202\37Eg\1\40Fg\202\37Eg\203\40Fg\202\40Eg\2\37Eg\37" - "Ef\202\40Fg\202\40Eg\3\40Ef\40Fg\40Eg\202\40Fg\204\40Eg\7\40Ef\37Eg\40" - "Eg\37Eg\37Fg\40Eg\40Ff\202\40Eg\3\40Fg\40Eg\40Ff\202\40Fg\7\37Eg\40F" - "g\37Fg\40Fg\37Fg\40Fg\37Eg\202\40Fg\1\37Fg\202\37Eg\22\40Eg&\251}\1\310\223\1}c\25""5JX$Km!Il\40Im\40Il!Im\242\255\272\274\304" - "\315Qk\205\377\377\377v\210\233!Il\324\331\336\310\316\326v\210\233\352" - "\354\357\40Im!Il\202\40Il\3Qk\206\377\377\377!Il\202!Im\202!Il\11\242" - "\255\272\274\304\315\242\255\272\352\354\357<[yez\220\377\377\377v\210" - "\233!Il\202\352\354\357\3\242\255\272!Im!Il\202\40Im\1\337\343\347\202" - "\242\255\272\30\274\304\314dz\220\377\377\377ez\220<[y\352\354\357\242" - "\255\272v\210\233\352\354\357!Im\324\331\336\224\241\260\274\304\314" - "\324\331\336\40Ilv\210\234\377\377\377dy\220\324\331\336\224\241\260" - "!Im\224\241\260\324\331\336\40Il\202!Il\6ez\221\352\354\357\224\241\260" - "!Im!Hm!Il\202!Im)!Il\377\377\377Rk\206!Il!Hl!Il\40Hl\242\255\272\274" - "\304\314\40Il\224\241\260\324\331\336!Hm\224\241\260\352\354\357v\210" - "\233\365\366\367<[zdz\220\377\377\377v\210\233\242\255\272\274\304\314" - "\310\316\326\310\316\325\40Im\242\255\272\377\377\377\40Hl\242\255\272" - "\274\304\314Rk\206\377\377\377v\210\233!Il\324\331\336\310\316\326\242" - "\255\272\274\304\314\40Il!Im\202\40Im\4\206\225\246\377\377\377!Il\40" - "Im\202\40Il\202!Im\1!Il\205!Im\5\40Im\40Hl!Il!Hl!Jm\203!Jn\1!Jm\207!" - "Jn\3\40Jn!Jm!Jn\202!In\202!Jn\5!Im!Jn!In\40Jn!In\205!Jn\1!Im\203!Jn\3" - "!In\40Jn!Jm\203!Jn\1!In\202!Jn\202\40Jn\2!Jn!In\205!Jn\3!Jm!Jn!Jm\202" - "!Jn\1!In\203!Jn\2!Im!Jm\202!Jn\1!Im\202!Jm\5!Jn\40Jn!Jne{\221\324\331" - "\336\202\377\377\377\5\352\354\357\224\242\261!Jne{\222\337\343\347\202" - "\377\377\377\2\352\354\357v\210\234\203!Jn\17v\210\234\365\366\367\352" - "\354\357\377\377\377\274\304\315!In\324\331\336\224\242\261Rl\206\352" - "\354\357\377\377\377\324\331\336\377\377\377!In\242\256\272\202\377\377" - "\377\23\337\343\347\242\256\272v\210\234\352\354\357!Jn\377\377\377R" - "k\206!Jn\274\304\315\242\256\272!Jm!Im!Jm!Jn(Ic.>@5DE0Kb&Ln\203!Jn\6" - "!Jm!Jn\242\256\272\274\304\315!Jn\242\255\272\202\377\377\377\4\337\343" - "\347Ql\206v\210\234\352\354\357\202!In\6!Jn!InRk\206\377\377\377!In!" - "Jn\202!In\17\40Jn\242\256\272\274\304\315e{\222\365\366\367\377\377\377" - "\337\343\347\352\354\357\206\225\246\40Jn\242\256\272\377\377\377Rl\206" - "!Jm\324\331\336\202\377\377\377\14\337\343\347Rl\206\224\242\261\377" - "\377\377\242\255\272\337\343\347\377\377\377\352\354\357\324\331\336" - "\257\271\304Ql\206\377\377\377\202\324\331\336\3\224\242\261Rl\206\337" - "\343\347\202\377\377\377\13\242\256\272!Jn\324\331\336\224\242\261!I" - "n\224\242\261\324\331\336!In!Im!In\337\343\347\204\377\377\377\1Rl\206" - "\203!Jn\205\377\377\377\202\242\256\272\20\274\304\315!Jn\224\242\261" - "\324\331\336!Jn\224\241\261\352\354\357!In\324\331\336\377\377\377\337" - "\343\347\365\366\367v\210\234\242\256\272\274\304\315\224\242\261\202" - "\377\377\377\5\310\316\326\377\377\377<\\{\224\242\260\377\377\377\202" - "\242\256\272\202\377\377\377\7\337\343\347Rl\206\242\255\272\274\304" - "\315!Jnv\210\234\365\366\367\202\377\377\377\5\242\255\272\40Jn!In\40" - "Jn!Im\202!Jn\1\40Jn\205!Jn\1!Jm\202!Jn\1!In\202!Ko\3!Jo!Kp!Jo\202!Kp" - "\202!Jo\3!Ko!Kp!Jo\202!Ko\4!Jp\"Jo!Ko!Jo\203!Ko\11\"Kp!Jp!Ko!Jo!Ko\"" - "Jo!Kp!Jo\"Ko\202!Ko\1!Jo\202!Ko\3\"Jo!Ko!Jo\202!Ko\10!Kp\"Jp!Kp!Jo!J" - "p\"Jo!Ko!Kp\204!Ko\1!Kp\203!Ko\"\"Ko!Jo!Ko!Jp!Kp!Ko!Jo!Kp\"Jo!Jo!Kp!" - "Jp!Ko!Jo!Jp\"Ko!Kp!Jo!Ko!Jo\"Jo!Ko!Jo!Ko!Kp!Ko\"Jo!Kp!Jo!Kov\211\235" - "\352\354\357\"Kp\"Ko\203!Jo\2!Kp!Jo\202!Ko\10\"Jo!KpRl\210\377\377\377" - "e{\222=]{\352\354\357\242\256\273\202!Jo\4!Ko!Jo\"Ko!Jo\205!Ko\6!Kp!" - "Ko#Lo#Ko\"Jp\"Kp\202!Ko\3\"Ko!Jo\"Ko\205!Ko\3!Jo!Jp\"Jp\202!Jo\3!Kp!" - "Jo!Jp\202!Jo\202!Ko\5!Jo!Kp!Ko!Jo!Ko\202!Jo\202!Kp\202!Jo\4!Kp\"Jo\274" - "\304\315\324\331\337\205!Jo\2!Ko!Kp\202!Ko\1!Jo\202!Ko\2!Jo!Kp\203!K" - "o\202!Jo\6\"Ko!Jo\"Ko!Jp!Ko!Jo\202!Ko\13!Kp!Ko!Jo!Ko!Jp!Kp\"Ko!Ko\"K" - "o!Kp!Jo\205!Ko\2\"Jp!Ko\202!Jo\202!Ko\202!Jo\6!Ko!Jo\"Kp!Jp!Kp!Jp\202" - "!Ko\3!Jo!Jp!Jo\203!Ko\1!Jo\203!Ko\2!Jp!Jo\203!Ko\3!Kp!Ko!Jo\202!Ko\2" - "!Jo!Ko\202!Jo\202!Ko\2!Kp\"Ko\203!Ko\1!Jp\205!Ko\5!Kp!Jo!Jp!Ko!Jo\202" - "!Ko\20!Lq!Kp\"Kq!Kq!Kp\"Lq!Kq!Lq\"Lp!Kp\"Kq!Lq!Kq!Lq\"Lp\"Lq\202!Lq\10" - "\"Lq!Lp!Kp\"Lp!Lp!Kq\"Lq!Lq\202!Kq\11\"Kq!Kq!Lq\"Lp!Kq!Lp!Kq!Lq\"Lp\202" - "\"Kq\202!Lq\6\"Kp!Lq\"Lp!Kq\"Lp\"Kp\202!Kp\12!Lp\"Kp\"Kq\"Kp!Kq!Kp!K" - "q!Lq!Kq!Lq\202!Kq\15\"Lp\"Kp!Kq\"Lp!Kp!Lp!Kp\"Kp\"Lq!Kq!Lq!Kp!Lp\202" - "!Lq\2!Kp\"Lp\202!Kq\13!Kp!Lp\"Kp!Kp!Lp\"Lqv\211\236\352\354\357!Lp!K" - "q\"Lq\202!Lq\3!Lp\"Kq\"Lq\202!Lq\3!Kq!Lq\274\304\315\202\377\377\377" - "\6\324\331\337=]}\"Kq!Lq!Kq\"Kq\202!Lp\14\"Lp!Kq!Lq\"Lp!Lp!Lq!Kq!Lq!" - "Lp\"Lp!Kp\"Kq\202!Kp\202\"Kq\1!Lp\202!Lq\202!Lp\4\"Lq!Lp!Kp!Lp\202!K" - "q\11\"Lq\"Kq!Lq!Kp\"Kq!Kq\"Lp!Lq!Kq\203\"Lp\11\"Kq!Lq\"Kq\"Lq\"Lp!Kq" - "\324\331\337\377\377\377e|\224\203!Kq\20\"Lp!Kq!Lp\"Lp!Kq\"Kp!Kq!Lq\"" - "Lp!Kp!Lp\"Kq\"Lq!Kp!Kq\"Kq\203\"Lp\23!Kp!Kq\"Lq!Kq!Lq!Kp\"Kp!Kq!Lp!K" - "q!Lq\"Lq\"Kp!Lp\"Kp!Lp\"Lq!Lq!Lp\203!Kq\6!Kp\"Lp\"Lq!Kq!Lp\"Lp\202!L" - "p\202\"Lp\10!Kp!Kq!Kp!Kq!Lp!Kq!Lp!Kp\202!Kq\13!Lp\"Kq!Kp!Lp\"Lq!Lp!K" - "q\"Kp!Lq!Kq!Kp\202!Lq\202!Kp\1\"Lp\202!Kq\6!Lq!Kq!Lq\"Lq!Kq!Lp\202!L" - "q\203!Kq\3!Kp!Lp\"Kq\202!Kp\1\"Lq\202!Kq\4!Lq\"Kq!Lr!Mr\202!Lr\2!Mr!" - "Lr\203\"Lr\203\"Mr\202!Lr\204\"Lr\2!Lr\"Lr\202!Lr\202\"Mr\2!Lr\"Lr\202" - "!Lr\202\"Lr\4!Lr\"Lr!Lr\"Lr\202!Lr\10\"Lr!Lr\"Mr\"Lr!Lr\"Mr\"Lr!Mr\202" - "\"Lr\4!Lr\"Mr!Lr\"Mr\203\"Lr\4!Mq\"Lr\"Mr!Lr\202\"Lr\1!Lr\204\"Lr\10" - "!Lr\"Mr!Mr\"Lr\"Mr\"Lr!Mr\"Mr\203!Lr\6\"Lr!Lr\"Lr\"Lq!Lr\"Mr\204!Lr\202" - "\"Lr\7!Mr!Lr\"Mr!Mr!Mq\"Mr\"Lr\203!Lr\6!Mr!Lr\"Mr\"Lr!Mr!Lr\202\"Lr\14" - "!Lr!Mr\"Lq!Lr\"Lr!Mr!Lr\"Mr\"Lr!Mr\"Lr!Lr\204\"Lr\2\"Mr!Mr\202!Lr\2!" - "Mr\"Mr\202\"Lr\202!Mr\1!Lr\203\"Lr\5!Lr\"Lr!Lr\"Lr!Lr\202\"Lr\202\"M" - "r\1\"Lr\202!Mr\202\"Mr\3\"Lr!Lr!Mr\202!Lr\2\"Lr!Lr\203\"Lr\11!Lr\"Mr" - "\"Lr\"Mr!Mr\"Lq!Lr\"Mr!Mr\202!Lr\3\"Lr!Lr\"Lr\202!Mr\202\"Lr\11!Lr\"" - "Lr!Lr\"Lr!Lr\"Lr!Mr!Lr\"Mr\202!Lr\2\"Lr!Mr\204\"Lr\4!Lr\"Lr\"Mr\"Lr\202" - "\"Mr\7!Lr\"Lr!Lr!Mr!Lr\"Lr\"Mr\202\"Lr\202!Lr\5\"Lr!Mr!Lr\"Mr!Lr\203" - "\"Lr\6!Lr!Mr\"Mr!Mr!Lr\"Lr\203!Lr\4\"Mr\"Lr!Lr!Mr\204\"Lr\1\"Mr\203\"" - "Lr\4!Mr!Lr\"Lr\"Mr\202\"Lr\5\"Mr\"Lr!Lr\"Lr\"Mr\202\"Lr\3\"Mr!Lr\"Mr" - "\203\"Lr\3!Lr\"Lr!Lr", -}; - - diff --git a/plugins/GSdx_legacy/GSLocalMemory.cpp b/plugins/GSdx_legacy/GSLocalMemory.cpp deleted file mode 100644 index 537a3f088f..0000000000 --- a/plugins/GSdx_legacy/GSLocalMemory.cpp +++ /dev/null @@ -1,2126 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - * Special Notes: - * - * Based on Page.c from GSSoft - * Copyright (C) 2002-2004 GSsoft Team - * - */ - -#include "stdafx.h" -#include "GSLocalMemory.h" - -#define ASSERT_BLOCK(r, w, h) \ - ASSERT((r).width() >= w && (r).height() >= h && !((r).left & (w - 1)) && !((r).top & (h - 1)) && !((r).right & (w - 1)) && !((r).bottom & (h - 1))); \ - -#define FOREACH_BLOCK_START(r, w, h, bpp) \ - ASSERT_BLOCK(r, w, h); \ - GSVector4i _r = r >> 3; \ - uint8* _dst = dst - _r.left * bpp; \ - int _offset = dstpitch * h; \ - for(int y = _r.top; y < _r.bottom; y += h >> 3, _dst += _offset) \ - { \ - uint32 _base = off->block.row[y]; \ - for(int x = _r.left; x < _r.right; x += w >> 3) \ - { \ - const uint8* src = BlockPtr(_base + off->block.col[x]); \ - uint8* dst = &_dst[x * bpp]; \ - -#define FOREACH_BLOCK_END }} - -// - -uint32 GSLocalMemory::pageOffset32[32][32][64]; -uint32 GSLocalMemory::pageOffset32Z[32][32][64]; -uint32 GSLocalMemory::pageOffset16[32][64][64]; -uint32 GSLocalMemory::pageOffset16S[32][64][64]; -uint32 GSLocalMemory::pageOffset16Z[32][64][64]; -uint32 GSLocalMemory::pageOffset16SZ[32][64][64]; -uint32 GSLocalMemory::pageOffset8[32][64][128]; -uint32 GSLocalMemory::pageOffset4[32][128][128]; - -int GSLocalMemory::rowOffset32[4096]; -int GSLocalMemory::rowOffset32Z[4096]; -int GSLocalMemory::rowOffset16[4096]; -int GSLocalMemory::rowOffset16S[4096]; -int GSLocalMemory::rowOffset16Z[4096]; -int GSLocalMemory::rowOffset16SZ[4096]; -int GSLocalMemory::rowOffset8[2][4096]; -int GSLocalMemory::rowOffset4[2][4096]; - -short GSLocalMemory::blockOffset32[256]; -short GSLocalMemory::blockOffset32Z[256]; -short GSLocalMemory::blockOffset16[256]; -short GSLocalMemory::blockOffset16S[256]; -short GSLocalMemory::blockOffset16Z[256]; -short GSLocalMemory::blockOffset16SZ[256]; -short GSLocalMemory::blockOffset8[256]; -short GSLocalMemory::blockOffset4[256]; - -// - -GSLocalMemory::psm_t GSLocalMemory::m_psm[64]; - -// - -GSLocalMemory::GSLocalMemory() - : m_clut(this) -{ - m_vm8 = (uint8*)vmalloc(m_vmsize * 2, false); - m_vm16 = (uint16*)m_vm8; - m_vm32 = (uint32*)m_vm8; - - memset(m_vm8, 0, m_vmsize); - - for(int bp = 0; bp < 32; bp++) - { - for(int y = 0; y < 32; y++) for(int x = 0; x < 64; x++) - { - pageOffset32[bp][y][x] = PixelAddressOrg32(x, y, bp, 0); - pageOffset32Z[bp][y][x] = PixelAddressOrg32Z(x, y, bp, 0); - } - - for(int y = 0; y < 64; y++) for(int x = 0; x < 64; x++) - { - pageOffset16[bp][y][x] = PixelAddressOrg16(x, y, bp, 0); - pageOffset16S[bp][y][x] = PixelAddressOrg16S(x, y, bp, 0); - pageOffset16Z[bp][y][x] = PixelAddressOrg16Z(x, y, bp, 0); - pageOffset16SZ[bp][y][x] = PixelAddressOrg16SZ(x, y, bp, 0); - } - - for(int y = 0; y < 64; y++) for(int x = 0; x < 128; x++) - { - pageOffset8[bp][y][x] = PixelAddressOrg8(x, y, bp, 0); - } - - for(int y = 0; y < 128; y++) for(int x = 0; x < 128; x++) - { - pageOffset4[bp][y][x] = PixelAddressOrg4(x, y, bp, 0); - } - } - - for(size_t x = 0; x < countof(rowOffset32); x++) - { - rowOffset32[x] = (int)PixelAddress32(x & 0x7ff, 0, 0, 32) - (int)PixelAddress32(0, 0, 0, 32); - } - - for(size_t x = 0; x < countof(rowOffset32Z); x++) - { - rowOffset32Z[x] = (int)PixelAddress32Z(x & 0x7ff, 0, 0, 32) - (int)PixelAddress32Z(0, 0, 0, 32); - } - - for(size_t x = 0; x < countof(rowOffset16); x++) - { - rowOffset16[x] = (int)PixelAddress16(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16(0, 0, 0, 32); - } - - for(size_t x = 0; x < countof(rowOffset16S); x++) - { - rowOffset16S[x] = (int)PixelAddress16S(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16S(0, 0, 0, 32); - } - - for(size_t x = 0; x < countof(rowOffset16Z); x++) - { - rowOffset16Z[x] = (int)PixelAddress16Z(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16Z(0, 0, 0, 32); - } - - for(size_t x = 0; x < countof(rowOffset16SZ); x++) - { - rowOffset16SZ[x] = (int)PixelAddress16SZ(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16SZ(0, 0, 0, 32); - } - - for(size_t x = 0; x < countof(rowOffset8[0]); x++) - { - rowOffset8[0][x] = (int)PixelAddress8(x & 0x7ff, 0, 0, 32) - (int)PixelAddress8(0, 0, 0, 32); - rowOffset8[1][x] = (int)PixelAddress8(x & 0x7ff, 2, 0, 32) - (int)PixelAddress8(0, 2, 0, 32); - } - - for(size_t x = 0; x < countof(rowOffset4[0]); x++) - { - rowOffset4[0][x] = (int)PixelAddress4(x & 0x7ff, 0, 0, 32) - (int)PixelAddress4(0, 0, 0, 32); - rowOffset4[1][x] = (int)PixelAddress4(x & 0x7ff, 2, 0, 32) - (int)PixelAddress4(0, 2, 0, 32); - } - - for(size_t x = 0; x < countof(blockOffset32); x++) - { - blockOffset32[x] = (short)((int)BlockNumber32(x << 3, 0, 0, 32) - (int)BlockNumber32(0, 0, 0, 32)); - } - - for(size_t x = 0; x < countof(blockOffset32Z); x++) - { - blockOffset32Z[x] = (short)((int)BlockNumber32Z(x << 3, 0, 0, 32) - (int)BlockNumber32Z(0, 0, 0, 32)); - } - - for(size_t x = 0; x < countof(blockOffset16); x++) - { - blockOffset16[x] = (short)((int)BlockNumber16(x << 3, 0, 0, 32) - (int)BlockNumber16(0, 0, 0, 32)); - } - - for(size_t x = 0; x < countof(blockOffset16S); x++) - { - blockOffset16S[x] = (short)((int)BlockNumber16S(x << 3, 0, 0, 32) - (int)BlockNumber16S(0, 0, 0, 32)); - } - - for(size_t x = 0; x < countof(blockOffset16Z); x++) - { - blockOffset16Z[x] = (short)((int)BlockNumber16Z(x << 3, 0, 0, 32) - (int)BlockNumber16Z(0, 0, 0, 32)); - } - - for(size_t x = 0; x < countof(blockOffset16SZ); x++) - { - blockOffset16SZ[x] = (short)((int)BlockNumber16SZ(x << 3, 0, 0, 32) - (int)BlockNumber16SZ(0, 0, 0, 32)); - } - - for(size_t x = 0; x < countof(blockOffset8); x++) - { - blockOffset8[x] = (short)((int)BlockNumber8(x << 3, 0, 0, 32) - (int)BlockNumber8(0, 0, 0, 32)); - } - - for(size_t x = 0; x < countof(blockOffset4); x++) - { - blockOffset4[x] = (short)((int)BlockNumber4(x << 3, 0, 0, 32) - (int)BlockNumber4(0, 0, 0, 32)); - } - - for(size_t i = 0; i < countof(m_psm); i++) - { - m_psm[i].pa = &GSLocalMemory::PixelAddress32; - m_psm[i].bn = &GSLocalMemory::BlockNumber32; - m_psm[i].rp = &GSLocalMemory::ReadPixel32; - m_psm[i].rpa = &GSLocalMemory::ReadPixel32; - m_psm[i].wp = &GSLocalMemory::WritePixel32; - m_psm[i].wpa = &GSLocalMemory::WritePixel32; - m_psm[i].rt = &GSLocalMemory::ReadTexel32; - m_psm[i].rta = &GSLocalMemory::ReadTexel32; - m_psm[i].wfa = &GSLocalMemory::WritePixel32; - m_psm[i].wi = &GSLocalMemory::WriteImage; - m_psm[i].ri = &GSLocalMemory::ReadImageX; // TODO - m_psm[i].rtx = &GSLocalMemory::ReadTexture32; - m_psm[i].rtxP = &GSLocalMemory::ReadTexture32; - m_psm[i].rtxb = &GSLocalMemory::ReadTextureBlock32; - m_psm[i].rtxbP = &GSLocalMemory::ReadTextureBlock32; - m_psm[i].bpp = m_psm[i].trbpp = 32; - m_psm[i].pal = 0; - m_psm[i].bs = GSVector2i(8, 8); - m_psm[i].pgs = GSVector2i(64, 32); - for(int j = 0; j < 8; j++) m_psm[i].rowOffset[j] = rowOffset32; - m_psm[i].blockOffset = blockOffset32; - m_psm[i].msk = 0xff; - } - - m_psm[PSM_PSMCT16].pa = &GSLocalMemory::PixelAddress16; - m_psm[PSM_PSMCT16S].pa = &GSLocalMemory::PixelAddress16S; - m_psm[PSM_PSMT8].pa = &GSLocalMemory::PixelAddress8; - m_psm[PSM_PSMT4].pa = &GSLocalMemory::PixelAddress4; - m_psm[PSM_PSMZ32].pa = &GSLocalMemory::PixelAddress32Z; - m_psm[PSM_PSMZ24].pa = &GSLocalMemory::PixelAddress32Z; - m_psm[PSM_PSMZ16].pa = &GSLocalMemory::PixelAddress16Z; - m_psm[PSM_PSMZ16S].pa = &GSLocalMemory::PixelAddress16SZ; - - m_psm[PSM_PSMCT16].bn = &GSLocalMemory::BlockNumber16; - m_psm[PSM_PSMCT16S].bn = &GSLocalMemory::BlockNumber16S; - m_psm[PSM_PSMT8].bn = &GSLocalMemory::BlockNumber8; - m_psm[PSM_PSMT4].bn = &GSLocalMemory::BlockNumber4; - m_psm[PSM_PSMZ32].bn = &GSLocalMemory::BlockNumber32Z; - m_psm[PSM_PSMZ24].bn = &GSLocalMemory::BlockNumber32Z; - m_psm[PSM_PSMZ16].bn = &GSLocalMemory::BlockNumber16Z; - m_psm[PSM_PSMZ16S].bn = &GSLocalMemory::BlockNumber16SZ; - - m_psm[PSM_PSMCT24].rp = &GSLocalMemory::ReadPixel24; - m_psm[PSM_PSMCT16].rp = &GSLocalMemory::ReadPixel16; - m_psm[PSM_PSMCT16S].rp = &GSLocalMemory::ReadPixel16S; - m_psm[PSM_PSMT8].rp = &GSLocalMemory::ReadPixel8; - m_psm[PSM_PSMT4].rp = &GSLocalMemory::ReadPixel4; - m_psm[PSM_PSMT8H].rp = &GSLocalMemory::ReadPixel8H; - m_psm[PSM_PSMT4HL].rp = &GSLocalMemory::ReadPixel4HL; - m_psm[PSM_PSMT4HH].rp = &GSLocalMemory::ReadPixel4HH; - m_psm[PSM_PSMZ32].rp = &GSLocalMemory::ReadPixel32Z; - m_psm[PSM_PSMZ24].rp = &GSLocalMemory::ReadPixel24Z; - m_psm[PSM_PSMZ16].rp = &GSLocalMemory::ReadPixel16Z; - m_psm[PSM_PSMZ16S].rp = &GSLocalMemory::ReadPixel16SZ; - - m_psm[PSM_PSMCT24].rpa = &GSLocalMemory::ReadPixel24; - m_psm[PSM_PSMCT16].rpa = &GSLocalMemory::ReadPixel16; - m_psm[PSM_PSMCT16S].rpa = &GSLocalMemory::ReadPixel16; - m_psm[PSM_PSMT8].rpa = &GSLocalMemory::ReadPixel8; - m_psm[PSM_PSMT4].rpa = &GSLocalMemory::ReadPixel4; - m_psm[PSM_PSMT8H].rpa = &GSLocalMemory::ReadPixel8H; - m_psm[PSM_PSMT4HL].rpa = &GSLocalMemory::ReadPixel4HL; - m_psm[PSM_PSMT4HH].rpa = &GSLocalMemory::ReadPixel4HH; - m_psm[PSM_PSMZ32].rpa = &GSLocalMemory::ReadPixel32; - m_psm[PSM_PSMZ24].rpa = &GSLocalMemory::ReadPixel24; - m_psm[PSM_PSMZ16].rpa = &GSLocalMemory::ReadPixel16; - m_psm[PSM_PSMZ16S].rpa = &GSLocalMemory::ReadPixel16; - - m_psm[PSM_PSMCT32].wp = &GSLocalMemory::WritePixel32; - m_psm[PSM_PSMCT24].wp = &GSLocalMemory::WritePixel24; - m_psm[PSM_PSMCT16].wp = &GSLocalMemory::WritePixel16; - m_psm[PSM_PSMCT16S].wp = &GSLocalMemory::WritePixel16S; - m_psm[PSM_PSMT8].wp = &GSLocalMemory::WritePixel8; - m_psm[PSM_PSMT4].wp = &GSLocalMemory::WritePixel4; - m_psm[PSM_PSMT8H].wp = &GSLocalMemory::WritePixel8H; - m_psm[PSM_PSMT4HL].wp = &GSLocalMemory::WritePixel4HL; - m_psm[PSM_PSMT4HH].wp = &GSLocalMemory::WritePixel4HH; - m_psm[PSM_PSMZ32].wp = &GSLocalMemory::WritePixel32Z; - m_psm[PSM_PSMZ24].wp = &GSLocalMemory::WritePixel24Z; - m_psm[PSM_PSMZ16].wp = &GSLocalMemory::WritePixel16Z; - m_psm[PSM_PSMZ16S].wp = &GSLocalMemory::WritePixel16SZ; - - m_psm[PSM_PSMCT32].wpa = &GSLocalMemory::WritePixel32; - m_psm[PSM_PSMCT24].wpa = &GSLocalMemory::WritePixel24; - m_psm[PSM_PSMCT16].wpa = &GSLocalMemory::WritePixel16; - m_psm[PSM_PSMCT16S].wpa = &GSLocalMemory::WritePixel16; - m_psm[PSM_PSMT8].wpa = &GSLocalMemory::WritePixel8; - m_psm[PSM_PSMT4].wpa = &GSLocalMemory::WritePixel4; - m_psm[PSM_PSMT8H].wpa = &GSLocalMemory::WritePixel8H; - m_psm[PSM_PSMT4HL].wpa = &GSLocalMemory::WritePixel4HL; - m_psm[PSM_PSMT4HH].wpa = &GSLocalMemory::WritePixel4HH; - m_psm[PSM_PSMZ32].wpa = &GSLocalMemory::WritePixel32; - m_psm[PSM_PSMZ24].wpa = &GSLocalMemory::WritePixel24; - m_psm[PSM_PSMZ16].wpa = &GSLocalMemory::WritePixel16; - m_psm[PSM_PSMZ16S].wpa = &GSLocalMemory::WritePixel16; - - m_psm[PSM_PSMCT24].rt = &GSLocalMemory::ReadTexel24; - m_psm[PSM_PSMCT16].rt = &GSLocalMemory::ReadTexel16; - m_psm[PSM_PSMCT16S].rt = &GSLocalMemory::ReadTexel16S; - m_psm[PSM_PSMT8].rt = &GSLocalMemory::ReadTexel8; - m_psm[PSM_PSMT4].rt = &GSLocalMemory::ReadTexel4; - m_psm[PSM_PSMT8H].rt = &GSLocalMemory::ReadTexel8H; - m_psm[PSM_PSMT4HL].rt = &GSLocalMemory::ReadTexel4HL; - m_psm[PSM_PSMT4HH].rt = &GSLocalMemory::ReadTexel4HH; - m_psm[PSM_PSMZ32].rt = &GSLocalMemory::ReadTexel32Z; - m_psm[PSM_PSMZ24].rt = &GSLocalMemory::ReadTexel24Z; - m_psm[PSM_PSMZ16].rt = &GSLocalMemory::ReadTexel16Z; - m_psm[PSM_PSMZ16S].rt = &GSLocalMemory::ReadTexel16SZ; - - m_psm[PSM_PSMCT24].rta = &GSLocalMemory::ReadTexel24; - m_psm[PSM_PSMCT16].rta = &GSLocalMemory::ReadTexel16; - m_psm[PSM_PSMCT16S].rta = &GSLocalMemory::ReadTexel16; - m_psm[PSM_PSMT8].rta = &GSLocalMemory::ReadTexel8; - m_psm[PSM_PSMT4].rta = &GSLocalMemory::ReadTexel4; - m_psm[PSM_PSMT8H].rta = &GSLocalMemory::ReadTexel8H; - m_psm[PSM_PSMT4HL].rta = &GSLocalMemory::ReadTexel4HL; - m_psm[PSM_PSMT4HH].rta = &GSLocalMemory::ReadTexel4HH; - m_psm[PSM_PSMZ24].rta = &GSLocalMemory::ReadTexel24; - m_psm[PSM_PSMZ16].rta = &GSLocalMemory::ReadTexel16; - m_psm[PSM_PSMZ16S].rta = &GSLocalMemory::ReadTexel16; - - m_psm[PSM_PSMCT24].wfa = &GSLocalMemory::WritePixel24; - m_psm[PSM_PSMCT16].wfa = &GSLocalMemory::WriteFrame16; - m_psm[PSM_PSMCT16S].wfa = &GSLocalMemory::WriteFrame16; - m_psm[PSM_PSMZ24].wfa = &GSLocalMemory::WritePixel24; - m_psm[PSM_PSMZ16].wfa = &GSLocalMemory::WriteFrame16; - m_psm[PSM_PSMZ16S].wfa = &GSLocalMemory::WriteFrame16; - - m_psm[PSM_PSMCT24].wi = &GSLocalMemory::WriteImage24; // TODO - m_psm[PSM_PSMCT16].wi = &GSLocalMemory::WriteImage; - m_psm[PSM_PSMCT16S].wi = &GSLocalMemory::WriteImage; - m_psm[PSM_PSMT8].wi = &GSLocalMemory::WriteImage; - m_psm[PSM_PSMT4].wi = &GSLocalMemory::WriteImage; - m_psm[PSM_PSMT8H].wi = &GSLocalMemory::WriteImage8H; // TODO - m_psm[PSM_PSMT4HL].wi = &GSLocalMemory::WriteImage4HL; // TODO - m_psm[PSM_PSMT4HH].wi = &GSLocalMemory::WriteImage4HH; // TODO - m_psm[PSM_PSMZ32].wi = &GSLocalMemory::WriteImage; - m_psm[PSM_PSMZ24].wi = &GSLocalMemory::WriteImage24Z; // TODO - m_psm[PSM_PSMZ16].wi = &GSLocalMemory::WriteImage; - m_psm[PSM_PSMZ16S].wi = &GSLocalMemory::WriteImage; - - m_psm[PSM_PSMCT24].rtx = &GSLocalMemory::ReadTexture24; - m_psm[PSM_PSMCT16].rtx = &GSLocalMemory::ReadTexture16; - m_psm[PSM_PSMCT16S].rtx = &GSLocalMemory::ReadTexture16; - m_psm[PSM_PSMT8].rtx = &GSLocalMemory::ReadTexture8; - m_psm[PSM_PSMT4].rtx = &GSLocalMemory::ReadTexture4; - m_psm[PSM_PSMT8H].rtx = &GSLocalMemory::ReadTexture8H; - m_psm[PSM_PSMT4HL].rtx = &GSLocalMemory::ReadTexture4HL; - m_psm[PSM_PSMT4HH].rtx = &GSLocalMemory::ReadTexture4HH; - m_psm[PSM_PSMZ32].rtx = &GSLocalMemory::ReadTexture32; - m_psm[PSM_PSMZ24].rtx = &GSLocalMemory::ReadTexture24; - m_psm[PSM_PSMZ16].rtx = &GSLocalMemory::ReadTexture16; - m_psm[PSM_PSMZ16S].rtx = &GSLocalMemory::ReadTexture16; - - m_psm[PSM_PSMCT24].rtxP = &GSLocalMemory::ReadTexture24; - m_psm[PSM_PSMCT16].rtxP = &GSLocalMemory::ReadTexture16; - m_psm[PSM_PSMCT16S].rtxP = &GSLocalMemory::ReadTexture16; - m_psm[PSM_PSMT8].rtxP = &GSLocalMemory::ReadTexture8P; - m_psm[PSM_PSMT4].rtxP = &GSLocalMemory::ReadTexture4P; - m_psm[PSM_PSMT8H].rtxP = &GSLocalMemory::ReadTexture8HP; - m_psm[PSM_PSMT4HL].rtxP = &GSLocalMemory::ReadTexture4HLP; - m_psm[PSM_PSMT4HH].rtxP = &GSLocalMemory::ReadTexture4HHP; - m_psm[PSM_PSMZ32].rtxP = &GSLocalMemory::ReadTexture32; - m_psm[PSM_PSMZ24].rtxP = &GSLocalMemory::ReadTexture24; - m_psm[PSM_PSMZ16].rtxP = &GSLocalMemory::ReadTexture16; - m_psm[PSM_PSMZ16S].rtxP = &GSLocalMemory::ReadTexture16; - - m_psm[PSM_PSMCT24].rtxb = &GSLocalMemory::ReadTextureBlock24; - m_psm[PSM_PSMCT16].rtxb = &GSLocalMemory::ReadTextureBlock16; - m_psm[PSM_PSMCT16S].rtxb = &GSLocalMemory::ReadTextureBlock16; - m_psm[PSM_PSMT8].rtxb = &GSLocalMemory::ReadTextureBlock8; - m_psm[PSM_PSMT4].rtxb = &GSLocalMemory::ReadTextureBlock4; - m_psm[PSM_PSMT8H].rtxb = &GSLocalMemory::ReadTextureBlock8H; - m_psm[PSM_PSMT4HL].rtxb = &GSLocalMemory::ReadTextureBlock4HL; - m_psm[PSM_PSMT4HH].rtxb = &GSLocalMemory::ReadTextureBlock4HH; - m_psm[PSM_PSMZ32].rtxb = &GSLocalMemory::ReadTextureBlock32; - m_psm[PSM_PSMZ24].rtxb = &GSLocalMemory::ReadTextureBlock24; - m_psm[PSM_PSMZ16].rtxb = &GSLocalMemory::ReadTextureBlock16; - m_psm[PSM_PSMZ16S].rtxb = &GSLocalMemory::ReadTextureBlock16; - - m_psm[PSM_PSMCT24].rtxbP = &GSLocalMemory::ReadTextureBlock24; - m_psm[PSM_PSMCT16].rtxbP = &GSLocalMemory::ReadTextureBlock16; - m_psm[PSM_PSMCT16S].rtxbP = &GSLocalMemory::ReadTextureBlock16; - m_psm[PSM_PSMT8].rtxbP = &GSLocalMemory::ReadTextureBlock8P; - m_psm[PSM_PSMT4].rtxbP = &GSLocalMemory::ReadTextureBlock4P; - m_psm[PSM_PSMT8H].rtxbP = &GSLocalMemory::ReadTextureBlock8HP; - m_psm[PSM_PSMT4HL].rtxbP = &GSLocalMemory::ReadTextureBlock4HLP; - m_psm[PSM_PSMT4HH].rtxbP = &GSLocalMemory::ReadTextureBlock4HHP; - m_psm[PSM_PSMZ32].rtxbP = &GSLocalMemory::ReadTextureBlock32; - m_psm[PSM_PSMZ24].rtxbP = &GSLocalMemory::ReadTextureBlock24; - m_psm[PSM_PSMZ16].rtxbP = &GSLocalMemory::ReadTextureBlock16; - m_psm[PSM_PSMZ16S].rtxbP = &GSLocalMemory::ReadTextureBlock16; - - m_psm[PSM_PSMCT16].bpp = m_psm[PSM_PSMCT16S].bpp = 16; - m_psm[PSM_PSMT8].bpp = 8; - m_psm[PSM_PSMT4].bpp = 4; - m_psm[PSM_PSMZ16].bpp = m_psm[PSM_PSMZ16S].bpp = 16; - - m_psm[PSM_PSMCT24].trbpp = 24; - m_psm[PSM_PSMCT16].trbpp = m_psm[PSM_PSMCT16S].trbpp = 16; - m_psm[PSM_PSMT8].trbpp = m_psm[PSM_PSMT8H].trbpp = 8; - m_psm[PSM_PSMT4].trbpp = m_psm[PSM_PSMT4HL].trbpp = m_psm[PSM_PSMT4HH].trbpp = 4; - m_psm[PSM_PSMZ24].trbpp = 24; - m_psm[PSM_PSMZ16].trbpp = m_psm[PSM_PSMZ16S].trbpp = 16; - - m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256; - m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16; - - for(size_t i = 0; i < countof(m_psm); i++) m_psm[i].fmt = 3; - m_psm[PSM_PSMCT32].fmt = m_psm[PSM_PSMZ32].fmt = 0; - m_psm[PSM_PSMCT24].fmt = m_psm[PSM_PSMZ24].fmt = 1; - m_psm[PSM_PSMCT16].fmt = m_psm[PSM_PSMZ16].fmt = 2; - m_psm[PSM_PSMCT16S].fmt = m_psm[PSM_PSMZ16S].fmt = 2; - - m_psm[PSM_PSMCT16].bs = m_psm[PSM_PSMCT16S].bs = GSVector2i(16, 8); - m_psm[PSM_PSMT8].bs = GSVector2i(16, 16); - m_psm[PSM_PSMT4].bs = GSVector2i(32, 16); - m_psm[PSM_PSMZ16].bs = m_psm[PSM_PSMZ16S].bs = GSVector2i(16, 8); - - m_psm[PSM_PSMCT16].pgs = m_psm[PSM_PSMCT16S].pgs = GSVector2i(64, 64); - m_psm[PSM_PSMT8].pgs = GSVector2i(128, 64); - m_psm[PSM_PSMT4].pgs = GSVector2i(128, 128); - m_psm[PSM_PSMZ16].pgs = m_psm[PSM_PSMZ16S].pgs = GSVector2i(64, 64); - - for(int i = 0; i < 8; i++) m_psm[PSM_PSMCT16].rowOffset[i] = rowOffset16; - for(int i = 0; i < 8; i++) m_psm[PSM_PSMCT16S].rowOffset[i] = rowOffset16S; - for(int i = 0; i < 8; i++) m_psm[PSM_PSMT8].rowOffset[i] = rowOffset8[((i + 2) >> 2) & 1]; - for(int i = 0; i < 8; i++) m_psm[PSM_PSMT4].rowOffset[i] = rowOffset4[((i + 2) >> 2) & 1]; - for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ32].rowOffset[i] = rowOffset32Z; - for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ24].rowOffset[i] = rowOffset32Z; - for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ16].rowOffset[i] = rowOffset16Z; - for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ16S].rowOffset[i] = rowOffset16SZ; - - m_psm[PSM_PSMCT16].blockOffset = blockOffset16; - m_psm[PSM_PSMCT16S].blockOffset = blockOffset16S; - m_psm[PSM_PSMT8].blockOffset = blockOffset8; - m_psm[PSM_PSMT4].blockOffset = blockOffset4; - m_psm[PSM_PSMZ32].blockOffset = blockOffset32Z; - m_psm[PSM_PSMZ24].blockOffset = blockOffset32Z; - m_psm[PSM_PSMZ16].blockOffset = blockOffset16Z; - m_psm[PSM_PSMZ16S].blockOffset = blockOffset16SZ; - - m_psm[PSM_PSMCT24].msk = 0x3f; - m_psm[PSM_PSMZ24].msk = 0x3f; - m_psm[PSM_PSMT8H].msk = 0xc0; - m_psm[PSM_PSMT4HL].msk = 0x40; - m_psm[PSM_PSMT4HH].msk = 0x80; -} - -GSLocalMemory::~GSLocalMemory() -{ - vmfree(m_vm8, m_vmsize * 2); - - for_each(m_omap.begin(), m_omap.end(), aligned_free_second()); - for_each(m_pomap.begin(), m_pomap.end(), aligned_free_second()); - for_each(m_po4map.begin(), m_po4map.end(), aligned_free_second()); - - for(hash_map*>::iterator i = m_p2tmap.begin(); i != m_p2tmap.end(); i++) - { - delete [] i->second; - } -} - -GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm) -{ - uint32 hash = bp | (bw << 14) | (psm << 20); - - hash_map::iterator i = m_omap.find(hash); - - if(i != m_omap.end()) - { - return i->second; - } - - GSOffset* off = new GSOffset(bp, bw, psm); - - m_omap[hash] = off; - - return off; -} - -GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF) -{ - uint32 fbp = FRAME.Block(); - uint32 zbp = ZBUF.Block(); - uint32 fpsm = FRAME.PSM; - uint32 zpsm = ZBUF.PSM; - uint32 bw = FRAME.FBW; - - ASSERT(m_psm[fpsm].trbpp > 8 || m_psm[zpsm].trbpp > 8); - - // "(psm & 0x0f) ^ ((psm & 0xf0) >> 2)" creates 4 bit unique identifiers for render target formats (only) - - uint32 fpsm_hash = (fpsm & 0x0f) ^ ((fpsm & 0x30) >> 2); - uint32 zpsm_hash = (zpsm & 0x0f) ^ ((zpsm & 0x30) >> 2); - - uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28); - - hash_map::iterator i = m_pomap.find(hash); - - if(i != m_pomap.end()) - { - return i->second; - } - - GSPixelOffset* off = (GSPixelOffset*)_aligned_malloc(sizeof(GSPixelOffset), 32); - - off->hash = hash; - off->fbp = fbp; - off->zbp = zbp; - off->fpsm = fpsm; - off->zpsm = zpsm; - off->bw = bw; - - pixelAddress fpa = m_psm[fpsm].pa; - pixelAddress zpa = m_psm[zpsm].pa; - - int fs = m_psm[fpsm].bpp >> 5; - int zs = m_psm[zpsm].bpp >> 5; - - for(int i = 0; i < 2048; i++) - { - off->row[i].x = (int)fpa(0, i, fbp, bw) << fs; - off->row[i].y = (int)zpa(0, i, zbp, bw) << zs; - } - - for(int i = 0; i < 2048; i++) - { - off->col[i].x = m_psm[fpsm].rowOffset[0][i] << fs; - off->col[i].y = m_psm[zpsm].rowOffset[0][i] << zs; - } - - m_pomap[hash] = off; - - return off; -} - -GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF) -{ - uint32 fbp = FRAME.Block(); - uint32 zbp = ZBUF.Block(); - uint32 fpsm = FRAME.PSM; - uint32 zpsm = ZBUF.PSM; - uint32 bw = FRAME.FBW; - - ASSERT(m_psm[fpsm].trbpp > 8 || m_psm[zpsm].trbpp > 8); - - // "(psm & 0x0f) ^ ((psm & 0xf0) >> 2)" creates 4 bit unique identifiers for render target formats (only) - - uint32 fpsm_hash = (fpsm & 0x0f) ^ ((fpsm & 0x30) >> 2); - uint32 zpsm_hash = (zpsm & 0x0f) ^ ((zpsm & 0x30) >> 2); - - uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28); - - hash_map::iterator i = m_po4map.find(hash); - - if(i != m_po4map.end()) - { - return i->second; - } - - GSPixelOffset4* off = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32); - - off->hash = hash; - off->fbp = fbp; - off->zbp = zbp; - off->fpsm = fpsm; - off->zpsm = zpsm; - off->bw = bw; - - pixelAddress fpa = m_psm[fpsm].pa; - pixelAddress zpa = m_psm[zpsm].pa; - - int fs = m_psm[fpsm].bpp >> 5; - int zs = m_psm[zpsm].bpp >> 5; - - for(int i = 0; i < 2048; i++) - { - off->row[i].x = (int)fpa(0, i, fbp, bw) << fs; - off->row[i].y = (int)zpa(0, i, zbp, bw) << zs; - } - - for(int i = 0; i < 512; i++) - { - off->col[i].x = m_psm[fpsm].rowOffset[0][i * 4] << fs; - off->col[i].y = m_psm[zpsm].rowOffset[0][i * 4] << zs; - } - - m_po4map[hash] = off; - - return off; -} - -static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) {return a.x < b.x;} - -vector* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) -{ - uint64 hash = TEX0.u64 & 0x3ffffffffull; // TBP0 TBW PSM TW TH - - hash_map*>::iterator i = m_p2tmap.find(hash); - - if(i != m_p2tmap.end()) - { - return i->second; - } - - GSVector2i bs = m_psm[TEX0.PSM].bs; - - int tw = std::max(1 << TEX0.TW, bs.x); - int th = std::max(1 << TEX0.TH, bs.y); - - const GSOffset* off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - - hash_map > tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks) - - for(int y = 0; y < th; y += bs.y) - { - uint32 base = off->block.row[y >> 3]; - - for(int x = 0, i = y << 7; x < tw; x += bs.x, i += bs.x) - { - uint32 page = (base + off->block.col[x >> 3]) >> 5; - - if(page < MAX_PAGES) - { - tmp[page].insert(i >> 3); // ((y << 7) | x) >> 3 - } - } - } - - // combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an uint32 array - - vector* p2t = new vector[MAX_PAGES]; - - for(hash_map >::iterator i = tmp.begin(); i != tmp.end(); i++) - { - uint32 page = i->first; - - hash_set& tiles = i->second; - - hash_map m; - - for(hash_set::iterator j = tiles.begin(); j != tiles.end(); j++) - { - uint32 addr = *j; - - uint32 row = addr >> 5; - uint32 col = 1 << (addr & 31); - - hash_map::iterator k = m.find(row); - - if(k != m.end()) - { - k->second |= col; - } - else - { - m[row] = col; - } - } - - // sort by x and flip the mask (it will be used to erase a lot of bits in a loop, [x] &= ~y) - - for(hash_map::iterator j = m.begin(); j != m.end(); j++) - { - p2t[page].push_back(GSVector2i(j->first, ~j->second)); - } - - std::sort(p2t[page].begin(), p2t[page].end(), cmp_vec2x); - } - - m_p2tmap[hash] = p2t; - - return p2t; -} - -//////////////////// - -template -void GSLocalMemory::WriteImageColumn(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF) -{ - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - const int csy = bsy / 4; - - for(int offset = srcpitch * csy; h >= csy; h -= csy, y += csy, src += offset) - { - for(int x = l; x < r; x += bsx) - { - switch(psm) - { - case PSM_PSMCT32: GSBlock::WriteColumn32(y, BlockPtr32(x, y, bp, bw), &src[x * 4], srcpitch); break; - case PSM_PSMCT16: GSBlock::WriteColumn16(y, BlockPtr16(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMCT16S: GSBlock::WriteColumn16(y, BlockPtr16S(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMT8: GSBlock::WriteColumn8(y, BlockPtr8(x, y, bp, bw), &src[x], srcpitch); break; - case PSM_PSMT4: GSBlock::WriteColumn4(y, BlockPtr4(x, y, bp, bw), &src[x >> 1], srcpitch); break; - case PSM_PSMZ32: GSBlock::WriteColumn32(y, BlockPtr32Z(x, y, bp, bw), &src[x * 4], srcpitch); break; - case PSM_PSMZ16: GSBlock::WriteColumn16(y, BlockPtr16Z(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMZ16S: GSBlock::WriteColumn16(y, BlockPtr16SZ(x, y, bp, bw), &src[x * 2], srcpitch); break; - // TODO - default: __assume(0); - } - } - } -} - -template -void GSLocalMemory::WriteImageBlock(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF) -{ - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - for(int offset = srcpitch * bsy; h >= bsy; h -= bsy, y += bsy, src += offset) - { - for(int x = l; x < r; x += bsx) - { - switch(psm) - { - case PSM_PSMCT32: GSBlock::WriteBlock32(BlockPtr32(x, y, bp, bw), &src[x * 4], srcpitch); break; - case PSM_PSMCT16: GSBlock::WriteBlock16(BlockPtr16(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMCT16S: GSBlock::WriteBlock16(BlockPtr16S(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMT8: GSBlock::WriteBlock8(BlockPtr8(x, y, bp, bw), &src[x], srcpitch); break; - case PSM_PSMT4: GSBlock::WriteBlock4(BlockPtr4(x, y, bp, bw), &src[x >> 1], srcpitch); break; - case PSM_PSMZ32: GSBlock::WriteBlock32(BlockPtr32Z(x, y, bp, bw), &src[x * 4], srcpitch); break; - case PSM_PSMZ16: GSBlock::WriteBlock16(BlockPtr16Z(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMZ16S: GSBlock::WriteBlock16(BlockPtr16SZ(x, y, bp, bw), &src[x * 2], srcpitch); break; - // TODO - default: __assume(0); - } - } - } -} - -template -void GSLocalMemory::WriteImageLeftRight(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF) -{ - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - for(; h > 0; y++, h--, src += srcpitch) - { - for(int x = l; x < r; x++) - { - switch(psm) - { - case PSM_PSMCT32: WritePixel32(x, y, *(uint32*)&src[x * 4], bp, bw); break; - case PSM_PSMCT16: WritePixel16(x, y, *(uint16*)&src[x * 2], bp, bw); break; - case PSM_PSMCT16S: WritePixel16S(x, y, *(uint16*)&src[x * 2], bp, bw); break; - case PSM_PSMT8: WritePixel8(x, y, src[x], bp, bw); break; - case PSM_PSMT4: WritePixel4(x, y, src[x >> 1] >> ((x & 1) << 2), bp, bw); break; - case PSM_PSMZ32: WritePixel32Z(x, y, *(uint32*)&src[x * 4], bp, bw); break; - case PSM_PSMZ16: WritePixel16Z(x, y, *(uint16*)&src[x * 2], bp, bw); break; - case PSM_PSMZ16S: WritePixel16SZ(x, y, *(uint16*)&src[x * 2], bp, bw); break; - // TODO - default: __assume(0); - } - } - } -} - -template -void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF) -{ - __aligned(uint8, 32) buff[64]; // merge buffer for one column - - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - const int csy = bsy / 4; - - // merge incomplete column - - int y2 = y & (csy - 1); - - if(y2 > 0) - { - int h2 = min(h, csy - y2); - - for(int x = l; x < r; x += bsx) - { - uint8* dst = NULL; - - switch(psm) - { - case PSM_PSMCT32: dst = BlockPtr32(x, y, bp, bw); break; - case PSM_PSMCT16: dst = BlockPtr16(x, y, bp, bw); break; - case PSM_PSMCT16S: dst = BlockPtr16S(x, y, bp, bw); break; - case PSM_PSMT8: dst = BlockPtr8(x, y, bp, bw); break; - case PSM_PSMT4: dst = BlockPtr4(x, y, bp, bw); break; - case PSM_PSMZ32: dst = BlockPtr32Z(x, y, bp, bw); break; - case PSM_PSMZ16: dst = BlockPtr16Z(x, y, bp, bw); break; - case PSM_PSMZ16S: dst = BlockPtr16SZ(x, y, bp, bw); break; - // TODO - default: __assume(0); - } - - switch(psm) - { - case PSM_PSMCT32: - case PSM_PSMZ32: - GSBlock::ReadColumn32(y, dst, buff, 32); - memcpy(&buff[32], &src[x * 4], 32); - GSBlock::WriteColumn32<32, 0xffffffff>(y, dst, buff, 32); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - GSBlock::ReadColumn16(y, dst, buff, 32); - memcpy(&buff[32], &src[x * 2], 32); - GSBlock::WriteColumn16<32>(y, dst, buff, 32); - break; - case PSM_PSMT8: - GSBlock::ReadColumn8(y, dst, buff, 16); - for(int i = 0, j = y2; i < h2; i++, j++) memcpy(&buff[j * 16], &src[i * srcpitch + x], 16); - GSBlock::WriteColumn8<32>(y, dst, buff, 16); - break; - case PSM_PSMT4: - GSBlock::ReadColumn4(y, dst, buff, 16); - for(int i = 0, j = y2; i < h2; i++, j++) memcpy(&buff[j * 16], &src[i * srcpitch + (x >> 1)], 16); - GSBlock::WriteColumn4<32>(y, dst, buff, 16); - break; - // TODO - default: - __assume(0); - } - } - - src += srcpitch * h2; - y += h2; - h -= h2; - } - - // write whole columns - - { - int h2 = h & ~(csy - 1); - - if(h2 > 0) - { - size_t addr = (size_t)&src[l * trbpp >> 3]; - - if((addr & 31) == 0 && (srcpitch & 31) == 0) - { - WriteImageColumn(l, r, y, h2, src, srcpitch, BITBLTBUF); - } - else if((addr & 15) == 0 && (srcpitch & 15) == 0) - { - WriteImageColumn(l, r, y, h2, src, srcpitch, BITBLTBUF); - } - else - { - WriteImageColumn(l, r, y, h2, src, srcpitch, BITBLTBUF); - } - - src += srcpitch * h2; - y += h2; - h -= h2; - } - } - - // merge incomplete column - - if(h >= 1) - { - for(int x = l; x < r; x += bsx) - { - uint8* dst = NULL; - - switch(psm) - { - case PSM_PSMCT32: dst = BlockPtr32(x, y, bp, bw); break; - case PSM_PSMCT16: dst = BlockPtr16(x, y, bp, bw); break; - case PSM_PSMCT16S: dst = BlockPtr16S(x, y, bp, bw); break; - case PSM_PSMT8: dst = BlockPtr8(x, y, bp, bw); break; - case PSM_PSMT4: dst = BlockPtr4(x, y, bp, bw); break; - case PSM_PSMZ32: dst = BlockPtr32Z(x, y, bp, bw); break; - case PSM_PSMZ16: dst = BlockPtr16Z(x, y, bp, bw); break; - case PSM_PSMZ16S: dst = BlockPtr16SZ(x, y, bp, bw); break; - // TODO - default: __assume(0); - } - - switch(psm) - { - case PSM_PSMCT32: - case PSM_PSMZ32: - GSBlock::ReadColumn32(y, dst, buff, 32); - memcpy(&buff[0], &src[x * 4], 32); - GSBlock::WriteColumn32<32, 0xffffffff>(y, dst, buff, 32); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - GSBlock::ReadColumn16(y, dst, buff, 32); - memcpy(&buff[0], &src[x * 2], 32); - GSBlock::WriteColumn16<32>(y, dst, buff, 32); - break; - case PSM_PSMT8: - GSBlock::ReadColumn8(y, dst, buff, 16); - for(int i = 0; i < h; i++) memcpy(&buff[i * 16], &src[i * srcpitch + x], 16); - GSBlock::WriteColumn8<32>(y, dst, buff, 16); - break; - case PSM_PSMT4: - GSBlock::ReadColumn4(y, dst, buff, 16); - for(int i = 0; i < h; i++) memcpy(&buff[i * 16], &src[i * srcpitch + (x >> 1)], 16); - GSBlock::WriteColumn4<32>(y, dst, buff, 16); - break; - // TODO - default: - __assume(0); - } - } - } -} - -template -void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) -{ - if(TRXREG.RRW == 0) return; - - int l = (int)TRXPOS.DSAX; - int r = l + (int)TRXREG.RRW; - - // finish the incomplete row first - - if(tx != l) - { - int n = min(len, (r - tx) * trbpp >> 3); - WriteImageX(tx, ty, src, n, BITBLTBUF, TRXPOS, TRXREG); - src += n; - len -= n; - } - - int la = (l + (bsx - 1)) & ~(bsx - 1); - int ra = r & ~(bsx - 1); - int srcpitch = (r - l) * trbpp >> 3; - int h = len / srcpitch; - - if(ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row - { - const uint8* s = &src[-l * trbpp >> 3]; - - src += srcpitch * h; - len -= srcpitch * h; - - // left part - - if(l < la) - { - WriteImageLeftRight(l, la, ty, h, s, srcpitch, BITBLTBUF); - } - - // right part - - if(ra < r) - { - WriteImageLeftRight(ra, r, ty, h, s, srcpitch, BITBLTBUF); - } - - // horizontally aligned part - - if(la < ra) - { - // top part - - { - int h2 = min(h, bsy - (ty & (bsy - 1))); - - if(h2 < bsy) - { - WriteImageTopBottom(la, ra, ty, h2, s, srcpitch, BITBLTBUF); - - s += srcpitch * h2; - ty += h2; - h -= h2; - } - } - - // horizontally and vertically aligned part - - { - int h2 = h & ~(bsy - 1); - - if(h2 > 0) - { - size_t addr = (size_t)&s[la * trbpp >> 3]; - - if((addr & 31) == 0 && (srcpitch & 31) == 0) - { - WriteImageBlock(la, ra, ty, h2, s, srcpitch, BITBLTBUF); - } - else if((addr & 15) == 0 && (srcpitch & 15) == 0) - { - WriteImageBlock(la, ra, ty, h2, s, srcpitch, BITBLTBUF); - } - else - { - WriteImageBlock(la, ra, ty, h2, s, srcpitch, BITBLTBUF); - } - - s += srcpitch * h2; - ty += h2; - h -= h2; - } - } - - // bottom part - - if(h > 0) - { - WriteImageTopBottom(la, ra, ty, h, s, srcpitch, BITBLTBUF); - - // s += srcpitch * h; - ty += h; - // h -= h; - } - } - } - - // the rest - - if(len > 0) - { - WriteImageX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); - } -} - - -#define IsTopLeftAligned(dsax, tx, ty, bw, bh) \ - ((((int)dsax) & ((bw)-1)) == 0 && ((tx) & ((bw)-1)) == 0 && ((int)dsax) == (tx) && ((ty) & ((bh)-1)) == 0) - -void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) -{ - if(TRXREG.RRW == 0) return; - - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW * 3; - int th = len / srcpitch; - - bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) - { - // TODO - - WriteImageX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); - } - else - { - th += ty; - - for(int y = ty; y < th; y += 8, src += srcpitch * 8) - { - for(int x = tx; x < tw; x += 8) - { - GSBlock::UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, BlockPtr32(x, y, bp, bw)); - } - } - - ty = th; - } -} - -void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) -{ - if(TRXREG.RRW == 0) return; - - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW; - int th = len / srcpitch; - - bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) - { - // TODO - - WriteImageX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); - } - else - { - th += ty; - - for(int y = ty; y < th; y += 8, src += srcpitch * 8) - { - for(int x = tx; x < tw; x += 8) - { - GSBlock::UnpackAndWriteBlock8H(src + (x - tx), srcpitch, BlockPtr32(x, y, bp, bw)); - } - } - - ty = th; - } -} - -void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) -{ - if(TRXREG.RRW == 0) return; - - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW / 2; - int th = len / srcpitch; - - bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) - { - // TODO - - WriteImageX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); - } - else - { - th += ty; - - for(int y = ty; y < th; y += 8, src += srcpitch * 8) - { - for(int x = tx; x < tw; x += 8) - { - GSBlock::UnpackAndWriteBlock4HL(src + (x - tx) / 2, srcpitch, BlockPtr32(x, y, bp, bw)); - } - } - - ty = th; - } -} - -void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) -{ - if(TRXREG.RRW == 0) return; - - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW / 2; - int th = len / srcpitch; - - bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) - { - // TODO - - WriteImageX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); - } - else - { - th += ty; - - for(int y = ty; y < th; y += 8, src += srcpitch * 8) - { - for(int x = tx; x < tw; x += 8) - { - GSBlock::UnpackAndWriteBlock4HH(src + (x - tx) / 2, srcpitch, BlockPtr32(x, y, bp, bw)); - } - } - - ty = th; - } -} - -void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) -{ - if(TRXREG.RRW == 0) return; - - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - - int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW * 3; - int th = len / srcpitch; - - bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) - { - // TODO - - WriteImageX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); - } - else - { - th += ty; - - for(int y = ty; y < th; y += 8, src += srcpitch * 8) - { - for(int x = tx; x < tw; x += 8) - { - GSBlock::UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, BlockPtr32Z(x, y, bp, bw)); - } - } - - ty = th; - } -} - -void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) -{ - if(len <= 0) return; - - const uint8* pb = (uint8*)src; - const uint16* pw = (uint16*)src; - const uint32* pd = (uint32*)src; - - uint32 bp = BITBLTBUF.DBP; - uint32 bw = BITBLTBUF.DBW; - psm_t* psm = &m_psm[BITBLTBUF.DPSM]; - - int x = tx; - int y = ty; - int sx = (int)TRXPOS.DSAX; - int ex = sx + (int)TRXREG.RRW; - - switch(BITBLTBUF.DPSM) - { - case PSM_PSMCT32: - case PSM_PSMZ32: - - len /= 4; - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pd++) - { - WritePixel32(addr + offset[x], *pd); - } - - if(x >= ex) {x = sx; y++;} - } - - break; - - case PSM_PSMCT24: - case PSM_PSMZ24: - - len /= 3; - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pb += 3) - { - WritePixel24(addr + offset[x], *(uint32*)pb); - } - - if(x >= ex) {x = sx; y++;} - } - - break; - - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - - len /= 2; - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pw++) - { - WritePixel16(addr + offset[x], *pw); - } - - if(x >= ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT8: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pb++) - { - WritePixel8(addr + offset[x], *pb); - } - - if(x >= ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - WritePixel4(addr + offset[x + 0], *pb & 0xf); - WritePixel4(addr + offset[x + 1], *pb >> 4); - } - - if(x >= ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT8H: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pb++) - { - WritePixel8H(addr + offset[x], *pb); - } - - if(x >= ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4HL: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - WritePixel4HL(addr + offset[x + 0], *pb & 0xf); - WritePixel4HL(addr + offset[x + 1], *pb >> 4); - } - - if(x >= ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4HH: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - WritePixel4HH(addr + offset[x + 0], *pb & 0xf); - WritePixel4HH(addr + offset[x + 1], *pb >> 4); - } - - if(x >= ex) {x = sx; y++;} - } - - break; - } - - tx = x; - ty = y; -} - -// - -void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const -{ - if(len <= 0) return; - - uint8* RESTRICT pb = (uint8*)dst; - uint16* RESTRICT pw = (uint16*)dst; - uint32* RESTRICT pd = (uint32*)dst; - - uint32 bp = BITBLTBUF.SBP; - uint32 bw = BITBLTBUF.SBW; - psm_t* RESTRICT psm = &m_psm[BITBLTBUF.SPSM]; - - int x = tx; - int y = ty; - int sx = (int)TRXPOS.SSAX; - int ex = sx + (int)TRXREG.RRW; - - // printf("spsm=%d x=%d ex=%d y=%d len=%d\n", BITBLTBUF.SPSM, x, ex, y, len); - - switch(BITBLTBUF.SPSM) - { - case PSM_PSMCT32: - case PSM_PSMZ32: - - // MGS1 intro, fade effect between two scenes (airplane outside-inside transition) - - len /= 4; - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(; len > 0 && x < ex && (x & 7); len--, x++, pd++) - { - *pd = ps[offset[x]]; - } - - // aligned to a column - - for(int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8) - { - int off = offset[x]; - - GSVector4i::store(&pd[0], GSVector4i::load(&ps[off + 0], &ps[off + 4])); - GSVector4i::store(&pd[4], GSVector4i::load(&ps[off + 8], &ps[off + 12])); - - for(int i = 0; i < 8; i++) ASSERT(pd[i] == ps[offset[x + i]]); - } - - for(; len > 0 && x < ex; len--, x++, pd++) - { - *pd = ps[offset[x]]; - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMCT24: - case PSM_PSMZ24: - - len /= 3; - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(; len > 0 && x < ex; len--, x++, pb += 3) - { - uint32 c = ps[offset[x]]; - - pb[0] = (uint8)(c); - pb[1] = (uint8)(c >> 8); - pb[2] = (uint8)(c >> 16); - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - - len /= 2; - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint16* RESTRICT ps = &m_vm16[psm->pa(0, y, bp, bw)]; - - for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4) - { - pw[0] = ps[offset[x + 0]]; - pw[1] = ps[offset[x + 1]]; - pw[2] = ps[offset[x + 2]]; - pw[3] = ps[offset[x + 3]]; - } - - for(; len > 0 && x < ex; len--, x++, pw++) - { - *pw = ps[offset[x]]; - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT8: - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint8* RESTRICT ps = &m_vm8[psm->pa(0, y, bp, bw)]; - - for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4) - { - pb[0] = ps[offset[x + 0]]; - pb[1] = ps[offset[x + 1]]; - pb[2] = ps[offset[x + 2]]; - pb[3] = ps[offset[x + 3]]; - } - - for(; len > 0 && x < ex; len--, x++, pb++) - { - *pb = ps[offset[x]]; - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* RESTRICT offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - *pb = (uint8)(ReadPixel4(addr + offset[x + 0]) | (ReadPixel4(addr + offset[x + 1]) << 4)); - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT8H: - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4) - { - pb[0] = (uint8)(ps[offset[x + 0]] >> 24); - pb[1] = (uint8)(ps[offset[x + 1]] >> 24); - pb[2] = (uint8)(ps[offset[x + 2]] >> 24); - pb[3] = (uint8)(ps[offset[x + 3]] >> 24); - } - - for(; len > 0 && x < ex; len--, x++, pb++) - { - *pb = (uint8)(ps[offset[x]] >> 24); - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4HL: - - while(len > 0) - { - int* offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - uint32 c0 = (ps[offset[x + 0]] >> 24) & 0x0f; - uint32 c1 = (ps[offset[x + 1]] >> 20) & 0xf0; - - *pb = (uint8)(c0 | c1); - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4HH: - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - uint32 c0 = (ps[offset[x + 0]] >> 28) & 0x0f; - uint32 c1 = (ps[offset[x + 1]] >> 24) & 0xf0; - - *pb = (uint8)(c0 | c1); - } - - if(x == ex) {x = sx; y++;} - } - - break; - } - - tx = x; - ty = y; -} - -/////////////////// - -void GSLocalMemory::ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - FOREACH_BLOCK_START(r, 8, 8, 32) - { - GSBlock::ReadBlock32(src, dst, dstpitch); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - if(TEXA.AEM) - { - FOREACH_BLOCK_START(r, 8, 8, 32) - { - GSBlock::ReadAndExpandBlock24(src, dst, dstpitch, TEXA); - } - FOREACH_BLOCK_END - } - else - { - FOREACH_BLOCK_START(r, 8, 8, 32) - { - GSBlock::ReadAndExpandBlock24(src, dst, dstpitch, TEXA); - } - FOREACH_BLOCK_END - } -} - -void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - if(TEXA.AEM) - { - FOREACH_BLOCK_START(r, 16, 8, 32) - { - GSBlock::ReadAndExpandBlock16(src, dst, dstpitch, TEXA); - } - FOREACH_BLOCK_END - } - else - { - FOREACH_BLOCK_START(r, 16, 8, 32) - { - GSBlock::ReadAndExpandBlock16(src, dst, dstpitch, TEXA); - } - FOREACH_BLOCK_END - } -} - -void GSLocalMemory::ReadTexture8(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - const uint32* pal = m_clut; - - FOREACH_BLOCK_START(r, 16, 16, 32) - { - GSBlock::ReadAndExpandBlock8_32(src, dst, dstpitch, pal); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture4(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - const uint64* pal = m_clut; - - FOREACH_BLOCK_START(r, 32, 16, 32) - { - GSBlock::ReadAndExpandBlock4_32(src, dst, dstpitch, pal); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture8H(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - const uint32* pal = m_clut; - - FOREACH_BLOCK_START(r, 8, 8, 32) - { - GSBlock::ReadAndExpandBlock8H_32(src, dst, dstpitch, pal); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture4HL(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - const uint32* pal = m_clut; - - FOREACH_BLOCK_START(r, 8, 8, 32) - { - GSBlock::ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture4HH(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - const uint32* pal = m_clut; - - FOREACH_BLOCK_START(r, 8, 8, 32) - { - GSBlock::ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal); - } - FOREACH_BLOCK_END -} - -/////////////////// - -void GSLocalMemory::ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadBlock32(BlockPtr(bp), dst, dstpitch); -} - -void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - if(TEXA.AEM) - { - GSBlock::ReadAndExpandBlock24(BlockPtr(bp), dst, dstpitch, TEXA); - } - else - { - GSBlock::ReadAndExpandBlock24(BlockPtr(bp), dst, dstpitch, TEXA); - } -} - -void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - if(TEXA.AEM) - { - GSBlock::ReadAndExpandBlock16(BlockPtr(bp), dst, dstpitch, TEXA); - } - else - { - GSBlock::ReadAndExpandBlock16(BlockPtr(bp), dst, dstpitch, TEXA); - } -} - -void GSLocalMemory::ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadAndExpandBlock8_32(BlockPtr(bp), dst, dstpitch, m_clut); -} - -void GSLocalMemory::ReadTextureBlock4(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadAndExpandBlock4_32(BlockPtr(bp), dst, dstpitch, m_clut); -} - -void GSLocalMemory::ReadTextureBlock8H(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadAndExpandBlock8H_32(BlockPtr(bp), dst, dstpitch, m_clut); -} - -void GSLocalMemory::ReadTextureBlock4HL(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadAndExpandBlock4HL_32(BlockPtr(bp), dst, dstpitch, m_clut); -} - -void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadAndExpandBlock4HH_32(BlockPtr(bp), dst, dstpitch, m_clut); -} - -/////////////////// - -void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - const psm_t& psm = m_psm[off->psm]; - - readTexel rt = psm.rt; - readTexture rtx = psm.rtx; - - if(r.width() < psm.bs.x || r.height() < psm.bs.y - || (r.left & (psm.bs.x - 1)) || (r.top & (psm.bs.y - 1)) - || (r.right & (psm.bs.x - 1)) || (r.bottom & (psm.bs.y - 1))) - { - GIFRegTEX0 TEX0; - - TEX0.TBP0 = off->bp; - TEX0.TBW = off->bw; - TEX0.PSM = off->psm; - - GSVector4i cr = r.ralign(psm.bs); - - bool aligned = ((size_t)(dst + (cr.left - r.left) * sizeof(uint32)) & 0xf) == 0; - - if(cr.rempty() || !aligned) - { - // TODO: expand r to block size, read into temp buffer - - if(!aligned) printf("unaligned memory pointer passed to ReadTexture\n"); - - for(int y = r.top; y < r.bottom; y++, dst += dstpitch) - { - for(int x = r.left, i = 0; x < r.right; x++, i++) - { - ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); - } - } - } - else - { - for(int y = r.top; y < cr.top; y++, dst += dstpitch) - { - for(int x = r.left, i = 0; x < r.right; x++, i++) - { - ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); - } - } - - for(int y = cr.bottom; y < r.bottom; y++, dst += dstpitch) - { - for(int x = r.left, i = 0; x < r.right; x++, i++) - { - ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); - } - } - - for(int y = cr.top; y < cr.bottom; y++, dst += dstpitch) - { - for(int x = r.left, i = 0; x < cr.left; x++, i++) - { - ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); - } - - for(int x = cr.right, i = x - r.left; x < r.right; x++, i++) - { - ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); - } - } - - if(!cr.rempty()) - { - (this->*rtx)(off, cr, dst + (cr.left - r.left) * sizeof(uint32), dstpitch, TEXA); - } - } - } - else - { - (this->*rtx)(off, r, dst, dstpitch, TEXA); - } -} - -// 32/8 - -void GSLocalMemory::ReadTexture8P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - FOREACH_BLOCK_START(r, 16, 16, 8) - { - GSBlock::ReadBlock8(src, dst, dstpitch); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture4P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - FOREACH_BLOCK_START(r, 32, 16, 8) - { - GSBlock::ReadBlock4P(src, dst, dstpitch); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture8HP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - FOREACH_BLOCK_START(r, 8, 8, 8) - { - GSBlock::ReadBlock8HP(src, dst, dstpitch); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture4HLP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - FOREACH_BLOCK_START(r, 8, 8, 8) - { - GSBlock::ReadBlock4HLP(src, dst, dstpitch); - } - FOREACH_BLOCK_END -} - -void GSLocalMemory::ReadTexture4HHP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) -{ - FOREACH_BLOCK_START(r, 8, 8, 8) - { - GSBlock::ReadBlock4HHP(src, dst, dstpitch); - } - FOREACH_BLOCK_END -} - -// - -void GSLocalMemory::ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - GSBlock::ReadBlock8(BlockPtr(bp), dst, dstpitch); -} - -void GSLocalMemory::ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadBlock4P(BlockPtr(bp), dst, dstpitch); -} - -void GSLocalMemory::ReadTextureBlock8HP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadBlock8HP(BlockPtr(bp), dst, dstpitch); -} - -void GSLocalMemory::ReadTextureBlock4HLP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadBlock4HLP(BlockPtr(bp), dst, dstpitch); -} - -void GSLocalMemory::ReadTextureBlock4HHP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const -{ - ALIGN_STACK(32); - - GSBlock::ReadBlock4HHP(BlockPtr(bp), dst, dstpitch); -} - -// - -#include "GSTextureSW.h" - -void GSLocalMemory::SaveBMP(const string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h) -{ - int pitch = w * 4; - int size = pitch * h; - void* bits = _aligned_malloc(size, 32); - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = bp; - TEX0.TBW = bw; - TEX0.PSM = psm; - - readPixel rp = m_psm[psm].rp; - - uint8* p = (uint8*)bits; - - for(int j = 0; j < h; j++, p += pitch) - { - for(int i = 0; i < w; i++) - { - ((uint32*)p)[i] = (this->*rp)(i, j, TEX0.TBP0, TEX0.TBW); - } - } - - GSTextureSW t(GSTexture::Offscreen, w, h); - - if(t.Update(GSVector4i(0, 0, w, h), bits, pitch)) - { - t.Save(fn); - } - - _aligned_free(bits); -} - -// GSOffset - -GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm) -{ - hash = _bp | (_bw << 14) | (_psm << 20); - - GSLocalMemory::pixelAddress bn = GSLocalMemory::m_psm[_psm].bn; - - for(int i = 0; i < 256; i++) - { - block.row[i] = (short)bn(0, i << 3, _bp, _bw); - } - - block.col = GSLocalMemory::m_psm[_psm].blockOffset; - - GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[_psm].pa; - - for(int i = 0; i < 4096; i++) - { - pixel.row[i] = (int)pa(0, i & 0x7ff, _bp, _bw); - } - - for(int i = 0; i < 8; i++) - { - pixel.col[i] = GSLocalMemory::m_psm[_psm].rowOffset[i]; - } -} - -GSOffset::~GSOffset() -{ -} - -uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bbox) -{ - GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; - - GSVector4i r = rect.ralign(bs); - - if(bbox != NULL) *bbox = r; - - // worst case: - // bp page-aligned: (w * h) / (64 * 32) - // bp block-aligned: (w * h) / (8 * 8) - - int size = r.width() * r.height(); - - int limit = MAX_PAGES + 1; - - if(pages == NULL) - { - limit = std::min((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1; - - pages = new uint32[limit]; - } - - __aligned(uint32, 16) tmp[16]; - - ((GSVector4i*)tmp)[0] = GSVector4i::zero(); - ((GSVector4i*)tmp)[1] = GSVector4i::zero(); - ((GSVector4i*)tmp)[2] = GSVector4i::zero(); - ((GSVector4i*)tmp)[3] = GSVector4i::zero(); - - r = r.sra32(3); - - bs.x >>= 3; - bs.y >>= 3; - - uint32* RESTRICT p = pages; - - for(int y = r.top; y < r.bottom; y += bs.y) - { - uint32 base = block.row[y]; - - for(int x = r.left; x < r.right; x += bs.x) - { - uint32 n = (base + block.col[x]) >> 5; - - if(n < MAX_PAGES) - { - uint32& row = tmp[n >> 5]; - uint32 col = 1 << (n & 31); - - if((row & col) == 0) - { - row |= col; - - *p++ = n; - } - } - } - } - - *p++ = (uint32)EOP; - - ASSERT(p - pages <= limit); - - return pages; -} - -GSVector4i* GSOffset::GetPagesAsBits(const GSVector4i& rect, GSVector4i* pages, GSVector4i* bbox) -{ - if(pages == NULL) - { - pages = (GSVector4i*)_aligned_malloc(sizeof(GSVector4i) * 4, 16); - } - - pages[0] = GSVector4i::zero(); - pages[1] = GSVector4i::zero(); - pages[2] = GSVector4i::zero(); - pages[3] = GSVector4i::zero(); - - GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; - - GSVector4i r = rect.ralign(bs); - - if(bbox != NULL) *bbox = r; - - r = r.sra32(3); - - bs.x >>= 3; - bs.y >>= 3; - - for(int y = r.top; y < r.bottom; y += bs.y) - { - uint32 base = block.row[y]; - - for(int x = r.left; x < r.right; x += bs.x) - { - uint32 n = (base + block.col[x]) >> 5; - - if(n < MAX_PAGES) - { - ((uint32*)pages)[n >> 5] |= 1 << (n & 31); - } - } - } - - return pages; - -} \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSLocalMemory.h b/plugins/GSdx_legacy/GSLocalMemory.h deleted file mode 100644 index 3b24eb0b1e..0000000000 --- a/plugins/GSdx_legacy/GSLocalMemory.h +++ /dev/null @@ -1,918 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSTables.h" -#include "GSVector.h" -#include "GSBlock.h" -#include "GSClut.h" - -class GSOffset : public GSAlignedClass<32> -{ -public: - __aligned(struct, 32) Block - { - short row[256]; // yn (n = 0 8 16 ...) - short* col; // blockOffset* - }; - - __aligned(struct, 32) Pixel - { - int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing) - int* col[8]; // rowOffset* - }; - - union {uint32 hash; struct {uint32 bp:14, bw:6, psm:6;};}; - - Block block; - Pixel pixel; - - GSOffset(uint32 bp, uint32 bw, uint32 psm); - virtual ~GSOffset(); - - enum {EOP = 0xffffffff}; - - uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL); - GSVector4i* GetPagesAsBits(const GSVector4i& rect, GSVector4i* pages = NULL, GSVector4i* bbox = NULL); // free returned value with _aligned_free -}; - -struct GSPixelOffset -{ - // 16 bit offsets (m_vm16[...]) - - GSVector2i row[2048]; // f yn | z yn - GSVector2i col[2048]; // f xn | z xn - uint32 hash; - uint32 fbp, zbp, fpsm, zpsm, bw; -}; - -struct GSPixelOffset4 -{ - // 16 bit offsets (m_vm16[...]) - - GSVector2i row[2048]; // f yn | z yn (n = 0 1 2 ...) - GSVector2i col[512]; // f xn | z xn (n = 0 4 8 ...) - uint32 hash; - uint32 fbp, zbp, fpsm, zpsm, bw; -}; - -class GSLocalMemory : public GSAlignedClass<32> -{ -public: - typedef uint32 (*pixelAddress)(int x, int y, uint32 bp, uint32 bw); - typedef void (GSLocalMemory::*writePixel)(int x, int y, uint32 c, uint32 bp, uint32 bw); - typedef void (GSLocalMemory::*writeFrame)(int x, int y, uint32 c, uint32 bp, uint32 bw); - typedef uint32 (GSLocalMemory::*readPixel)(int x, int y, uint32 bp, uint32 bw) const; - typedef uint32 (GSLocalMemory::*readTexel)(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - typedef void (GSLocalMemory::*writePixelAddr)(uint32 addr, uint32 c); - typedef void (GSLocalMemory::*writeFrameAddr)(uint32 addr, uint32 c); - typedef uint32 (GSLocalMemory::*readPixelAddr)(uint32 addr) const; - typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const; - typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); - typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const; - typedef void (GSLocalMemory::*readTexture)(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - - __aligned(struct, 128) psm_t - { - pixelAddress pa, bn; - readPixel rp; - readPixelAddr rpa; - writePixel wp; - writePixelAddr wpa; - readTexel rt; - readTexelAddr rta; - writeFrameAddr wfa; - writeImage wi; - readImage ri; - readTexture rtx, rtxP; - readTextureBlock rtxb, rtxbP; - uint16 bpp, trbpp, pal, fmt; - GSVector2i bs, pgs; - int* rowOffset[8]; - short* blockOffset; - uint8 msk; - }; - - static psm_t m_psm[64]; - - static const int m_vmsize = 1024 * 1024 * 4; - - uint8* m_vm8; - uint16* m_vm16; - uint32* m_vm32; - - GSClut m_clut; - -protected: - static uint32 pageOffset32[32][32][64]; - static uint32 pageOffset32Z[32][32][64]; - static uint32 pageOffset16[32][64][64]; - static uint32 pageOffset16S[32][64][64]; - static uint32 pageOffset16Z[32][64][64]; - static uint32 pageOffset16SZ[32][64][64]; - static uint32 pageOffset8[32][64][128]; - static uint32 pageOffset4[32][128][128]; - - static int rowOffset32[4096]; - static int rowOffset32Z[4096]; - static int rowOffset16[4096]; - static int rowOffset16S[4096]; - static int rowOffset16Z[4096]; - static int rowOffset16SZ[4096]; - static int rowOffset8[2][4096]; - static int rowOffset4[2][4096]; - - static short blockOffset32[256]; - static short blockOffset32Z[256]; - static short blockOffset16[256]; - static short blockOffset16S[256]; - static short blockOffset16Z[256]; - static short blockOffset16SZ[256]; - static short blockOffset8[256]; - static short blockOffset4[256]; - - __forceinline static uint32 Expand24To32(uint32 c, const GIFRegTEXA& TEXA) - { - return (((!TEXA.AEM | (c & 0xffffff)) ? TEXA.TA0 : 0) << 24) | (c & 0xffffff); - } - - __forceinline static uint32 Expand16To32(uint16 c, const GIFRegTEXA& TEXA) - { - return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3); - } - - // TODO - - friend class GSClut; - - // - - hash_map m_omap; - hash_map m_pomap; - hash_map m_po4map; - hash_map*> m_p2tmap; - -public: - GSLocalMemory(); - virtual ~GSLocalMemory(); - - GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm); - GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); - GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); - vector* GetPage2TileMap(const GIFRegTEX0& TEX0); - - // address - - static uint32 BlockNumber32(int x, int y, uint32 bp, uint32 bw) - { - return bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable32[(y >> 3) & 3][(x >> 3) & 7]; - } - - static uint32 BlockNumber16(int x, int y, uint32 bp, uint32 bw) - { - return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16[(y >> 3) & 7][(x >> 4) & 3]; - } - - static uint32 BlockNumber16S(int x, int y, uint32 bp, uint32 bw) - { - return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16S[(y >> 3) & 7][(x >> 4) & 3]; - } - - static uint32 BlockNumber8(int x, int y, uint32 bp, uint32 bw) - { - // ASSERT((bw & 1) == 0); // allowed for mipmap levels - - return bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f) + blockTable8[(y >> 4) & 3][(x >> 4) & 7]; - } - - static uint32 BlockNumber4(int x, int y, uint32 bp, uint32 bw) - { - // ASSERT((bw & 1) == 0); // allowed for mipmap levels - - return bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f) + blockTable4[(y >> 4) & 7][(x >> 5) & 3]; - } - - static uint32 BlockNumber32Z(int x, int y, uint32 bp, uint32 bw) - { - return bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable32Z[(y >> 3) & 3][(x >> 3) & 7]; - } - - static uint32 BlockNumber16Z(int x, int y, uint32 bp, uint32 bw) - { - return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16Z[(y >> 3) & 7][(x >> 4) & 3]; - } - - static uint32 BlockNumber16SZ(int x, int y, uint32 bp, uint32 bw) - { - return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3]; - } - - uint8* BlockPtr(uint32 bp) const - { - ASSERT(bp < 16384); - - return &m_vm8[bp << 8]; - } - - uint8* BlockPtr32(int x, int y, uint32 bp, uint32 bw) const - { - return &m_vm8[BlockNumber32(x, y, bp, bw) << 8]; - } - - uint8* BlockPtr16(int x, int y, uint32 bp, uint32 bw) const - { - return &m_vm8[BlockNumber16(x, y, bp, bw) << 8]; - } - - uint8* BlockPtr16S(int x, int y, uint32 bp, uint32 bw) const - { - return &m_vm8[BlockNumber16S(x, y, bp, bw) << 8]; - } - - uint8* BlockPtr8(int x, int y, uint32 bp, uint32 bw) const - { - return &m_vm8[BlockNumber8(x, y, bp, bw) << 8]; - } - - uint8* BlockPtr4(int x, int y, uint32 bp, uint32 bw) const - { - return &m_vm8[BlockNumber4(x, y, bp, bw) << 8]; - } - - uint8* BlockPtr32Z(int x, int y, uint32 bp, uint32 bw) const - { - return &m_vm8[BlockNumber32Z(x, y, bp, bw) << 8]; - } - - uint8* BlockPtr16Z(int x, int y, uint32 bp, uint32 bw) const - { - return &m_vm8[BlockNumber16Z(x, y, bp, bw) << 8]; - } - - uint8* BlockPtr16SZ(int x, int y, uint32 bp, uint32 bw) const - { - return &m_vm8[BlockNumber16SZ(x, y, bp, bw) << 8]; - } - - static uint32 PixelAddressOrg32(int x, int y, uint32 bp, uint32 bw) - { - return (BlockNumber32(x, y, bp, bw) << 6) + columnTable32[y & 7][x & 7]; - } - - static uint32 PixelAddressOrg16(int x, int y, uint32 bp, uint32 bw) - { - return (BlockNumber16(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15]; - } - - static uint32 PixelAddressOrg16S(int x, int y, uint32 bp, uint32 bw) - { - return (BlockNumber16S(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15]; - } - - static uint32 PixelAddressOrg8(int x, int y, uint32 bp, uint32 bw) - { - return (BlockNumber8(x, y, bp, bw) << 8) + columnTable8[y & 15][x & 15]; - } - - static uint32 PixelAddressOrg4(int x, int y, uint32 bp, uint32 bw) - { - return (BlockNumber4(x, y, bp, bw) << 9) + columnTable4[y & 15][x & 31]; - } - - static uint32 PixelAddressOrg32Z(int x, int y, uint32 bp, uint32 bw) - { - return (BlockNumber32Z(x, y, bp, bw) << 6) + columnTable32[y & 7][x & 7]; - } - - static uint32 PixelAddressOrg16Z(int x, int y, uint32 bp, uint32 bw) - { - return (BlockNumber16Z(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15]; - } - - static uint32 PixelAddressOrg16SZ(int x, int y, uint32 bp, uint32 bw) - { - return (BlockNumber16SZ(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15]; - } - - static __forceinline uint32 PixelAddress32(int x, int y, uint32 bp, uint32 bw) - { - uint32 page = (bp >> 5) + (y >> 5) * bw + (x >> 6); - uint32 word = (page << 11) + pageOffset32[bp & 0x1f][y & 0x1f][x & 0x3f]; - - return word; - } - - static __forceinline uint32 PixelAddress16(int x, int y, uint32 bp, uint32 bw) - { - uint32 page = (bp >> 5) + (y >> 6) * bw + (x >> 6); - uint32 word = (page << 12) + pageOffset16[bp & 0x1f][y & 0x3f][x & 0x3f]; - - return word; - } - - static __forceinline uint32 PixelAddress16S(int x, int y, uint32 bp, uint32 bw) - { - uint32 page = (bp >> 5) + (y >> 6) * bw + (x >> 6); - uint32 word = (page << 12) + pageOffset16S[bp & 0x1f][y & 0x3f][x & 0x3f]; - - return word; - } - - static __forceinline uint32 PixelAddress8(int x, int y, uint32 bp, uint32 bw) - { - // ASSERT((bw & 1) == 0); // allowed for mipmap levels - - uint32 page = (bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7); - uint32 word = (page << 13) + pageOffset8[bp & 0x1f][y & 0x3f][x & 0x7f]; - - return word; - } - - static __forceinline uint32 PixelAddress4(int x, int y, uint32 bp, uint32 bw) - { - // ASSERT((bw & 1) == 0); // allowed for mipmap levels - - uint32 page = (bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7); - uint32 word = (page << 14) + pageOffset4[bp & 0x1f][y & 0x7f][x & 0x7f]; - - return word; - } - - static __forceinline uint32 PixelAddress32Z(int x, int y, uint32 bp, uint32 bw) - { - uint32 page = (bp >> 5) + (y >> 5) * bw + (x >> 6); - uint32 word = (page << 11) + pageOffset32Z[bp & 0x1f][y & 0x1f][x & 0x3f]; - - return word; - } - - static __forceinline uint32 PixelAddress16Z(int x, int y, uint32 bp, uint32 bw) - { - uint32 page = (bp >> 5) + (y >> 6) * bw + (x >> 6); - uint32 word = (page << 12) + pageOffset16Z[bp & 0x1f][y & 0x3f][x & 0x3f]; - - return word; - } - - static __forceinline uint32 PixelAddress16SZ(int x, int y, uint32 bp, uint32 bw) - { - uint32 page = (bp >> 5) + (y >> 6) * bw + (x >> 6); - uint32 word = (page << 12) + pageOffset16SZ[bp & 0x1f][y & 0x3f][x & 0x3f]; - - return word; - } - - // pixel R/W - - __forceinline uint32 ReadPixel32(uint32 addr) const - { - return m_vm32[addr]; - } - - __forceinline uint32 ReadPixel24(uint32 addr) const - { - return m_vm32[addr] & 0x00ffffff; - } - - __forceinline uint32 ReadPixel16(uint32 addr) const - { - return (uint32)m_vm16[addr]; - } - - __forceinline uint32 ReadPixel8(uint32 addr) const - { - return (uint32)m_vm8[addr]; - } - - __forceinline uint32 ReadPixel4(uint32 addr) const - { - return (m_vm8[addr >> 1] >> ((addr & 1) << 2)) & 0x0f; - } - - __forceinline uint32 ReadPixel8H(uint32 addr) const - { - return m_vm32[addr] >> 24; - } - - __forceinline uint32 ReadPixel4HL(uint32 addr) const - { - return (m_vm32[addr] >> 24) & 0x0f; - } - - __forceinline uint32 ReadPixel4HH(uint32 addr) const - { - return (m_vm32[addr] >> 28) & 0x0f; - } - - __forceinline uint32 ReadFrame24(uint32 addr) const - { - return 0x80000000 | (m_vm32[addr] & 0xffffff); - } - - __forceinline uint32 ReadFrame16(uint32 addr) const - { - uint32 c = (uint32)m_vm16[addr]; - - return ((c & 0x8000) << 16) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3); - } - - __forceinline uint32 ReadPixel32(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel32(PixelAddress32(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel24(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel24(PixelAddress32(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel16(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel16(PixelAddress16(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel16S(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel16(PixelAddress16S(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel8(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel8(PixelAddress8(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel4(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel4(PixelAddress4(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel8H(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel8H(PixelAddress32(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel4HL(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel4HL(PixelAddress32(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel4HH(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel4HH(PixelAddress32(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel32Z(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel32(PixelAddress32Z(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel24Z(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel24(PixelAddress32Z(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel16Z(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel16(PixelAddress16Z(x, y, bp, bw)); - } - - __forceinline uint32 ReadPixel16SZ(int x, int y, uint32 bp, uint32 bw) const - { - return ReadPixel16(PixelAddress16SZ(x, y, bp, bw)); - } - - __forceinline uint32 ReadFrame24(int x, int y, uint32 bp, uint32 bw) const - { - return ReadFrame24(PixelAddress32(x, y, bp, bw)); - } - - __forceinline uint32 ReadFrame16(int x, int y, uint32 bp, uint32 bw) const - { - return ReadFrame16(PixelAddress16(x, y, bp, bw)); - } - - __forceinline uint32 ReadFrame16S(int x, int y, uint32 bp, uint32 bw) const - { - return ReadFrame16(PixelAddress16S(x, y, bp, bw)); - } - - __forceinline uint32 ReadFrame24Z(int x, int y, uint32 bp, uint32 bw) const - { - return ReadFrame24(PixelAddress32Z(x, y, bp, bw)); - } - - __forceinline uint32 ReadFrame16Z(int x, int y, uint32 bp, uint32 bw) const - { - return ReadFrame16(PixelAddress16Z(x, y, bp, bw)); - } - - __forceinline uint32 ReadFrame16SZ(int x, int y, uint32 bp, uint32 bw) const - { - return ReadFrame16(PixelAddress16SZ(x, y, bp, bw)); - } - - __forceinline void WritePixel32(uint32 addr, uint32 c) - { - m_vm32[addr] = c; - } - - __forceinline void WritePixel24(uint32 addr, uint32 c) - { - m_vm32[addr] = (m_vm32[addr] & 0xff000000) | (c & 0x00ffffff); - } - - __forceinline void WritePixel16(uint32 addr, uint32 c) - { - m_vm16[addr] = (uint16)c; - } - - __forceinline void WritePixel8(uint32 addr, uint32 c) - { - m_vm8[addr] = (uint8)c; - } - - __forceinline void WritePixel4(uint32 addr, uint32 c) - { - int shift = (addr & 1) << 2; addr >>= 1; - - m_vm8[addr] = (uint8)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift)); - } - - __forceinline void WritePixel8H(uint32 addr, uint32 c) - { - m_vm32[addr] = (m_vm32[addr] & 0x00ffffff) | (c << 24); - } - - __forceinline void WritePixel4HL(uint32 addr, uint32 c) - { - m_vm32[addr] = (m_vm32[addr] & 0xf0ffffff) | ((c & 0x0f) << 24); - } - - __forceinline void WritePixel4HH(uint32 addr, uint32 c) - { - m_vm32[addr] = (m_vm32[addr] & 0x0fffffff) | ((c & 0x0f) << 28); - } - - __forceinline void WriteFrame16(uint32 addr, uint32 c) - { - uint32 rb = c & 0x00f800f8; - uint32 ga = c & 0x8000f800; - - WritePixel16(addr, (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3)); - } - - __forceinline void WritePixel32(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel32(PixelAddress32(x, y, bp, bw), c); - } - - __forceinline void WritePixel24(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel24(PixelAddress32(x, y, bp, bw), c); - } - - __forceinline void WritePixel16(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel16(PixelAddress16(x, y, bp, bw), c); - } - - __forceinline void WritePixel16S(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel16(PixelAddress16S(x, y, bp, bw), c); - } - - __forceinline void WritePixel8(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel8(PixelAddress8(x, y, bp, bw), c); - } - - __forceinline void WritePixel4(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel4(PixelAddress4(x, y, bp, bw), c); - } - - __forceinline void WritePixel8H(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel8H(PixelAddress32(x, y, bp, bw), c); - } - - __forceinline void WritePixel4HL(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel4HL(PixelAddress32(x, y, bp, bw), c); - } - - __forceinline void WritePixel4HH(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel4HH(PixelAddress32(x, y, bp, bw), c); - } - - __forceinline void WritePixel32Z(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel32(PixelAddress32Z(x, y, bp, bw), c); - } - - __forceinline void WritePixel24Z(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel24(PixelAddress32Z(x, y, bp, bw), c); - } - - __forceinline void WritePixel16Z(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel16(PixelAddress16Z(x, y, bp, bw), c); - } - - __forceinline void WritePixel16SZ(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WritePixel16(PixelAddress16SZ(x, y, bp, bw), c); - } - - __forceinline void WriteFrame16(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WriteFrame16(PixelAddress16(x, y, bp, bw), c); - } - - __forceinline void WriteFrame16S(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WriteFrame16(PixelAddress16S(x, y, bp, bw), c); - } - - __forceinline void WriteFrame16Z(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WriteFrame16(PixelAddress16Z(x, y, bp, bw), c); - } - - __forceinline void WriteFrame16SZ(int x, int y, uint32 c, uint32 bp, uint32 bw) - { - WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c); - } - - __forceinline void WritePixel32(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r) - { - src -= r.left * sizeof(uint32); - - for(int y = r.top; y < r.bottom; y++, src += pitch) - { - uint32* RESTRICT s = (uint32*)src; - uint32* RESTRICT d = &m_vm32[off->pixel.row[y]]; - int* RESTRICT col = off->pixel.col[0]; - - for(int x = r.left; x < r.right; x++) - { - d[col[x]] = s[x]; - } - } - } - - __forceinline void WritePixel24(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r) - { - src -= r.left * sizeof(uint32); - - for(int y = r.top; y < r.bottom; y++, src += pitch) - { - uint32* RESTRICT s = (uint32*)src; - uint32* RESTRICT d = &m_vm32[off->pixel.row[y]]; - int* RESTRICT col = off->pixel.col[0]; - - for(int x = r.left; x < r.right; x++) - { - d[col[x]] = (d[col[x]] & 0xff000000) | (s[x] & 0x00ffffff); - } - } - } - - __forceinline void WritePixel16(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r) - { - src -= r.left * sizeof(uint16); - - for(int y = r.top; y < r.bottom; y++, src += pitch) - { - uint16* RESTRICT s = (uint16*)src; - uint16* RESTRICT d = &m_vm16[off->pixel.row[y]]; - int* RESTRICT col = off->pixel.col[0]; - - for(int x = r.left; x < r.right; x++) - { - d[col[x]] = s[x]; - } - } - } - - __forceinline void WriteFrame16(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r) - { - src -= r.left * sizeof(uint32); - - for(int y = r.top; y < r.bottom; y++, src += pitch) - { - uint32* RESTRICT s = (uint32*)src; - uint16* RESTRICT d = &m_vm16[off->pixel.row[y]]; - int* RESTRICT col = off->pixel.col[0]; - - for(int x = r.left; x < r.right; x++) - { - uint32 rb = s[x] & 0x00f800f8; - uint32 ga = s[x] & 0x8000f800; - - d[col[x]] = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3)); - } - } - } - - __forceinline uint32 ReadTexel32(uint32 addr, const GIFRegTEXA& TEXA) const - { - return m_vm32[addr]; - } - - __forceinline uint32 ReadTexel24(uint32 addr, const GIFRegTEXA& TEXA) const - { - return Expand24To32(m_vm32[addr], TEXA); - } - - __forceinline uint32 ReadTexel16(uint32 addr, const GIFRegTEXA& TEXA) const - { - return Expand16To32(m_vm16[addr], TEXA); - } - - __forceinline uint32 ReadTexel8(uint32 addr, const GIFRegTEXA& TEXA) const - { - return m_clut[ReadPixel8(addr)]; - } - - __forceinline uint32 ReadTexel4(uint32 addr, const GIFRegTEXA& TEXA) const - { - return m_clut[ReadPixel4(addr)]; - } - - __forceinline uint32 ReadTexel8H(uint32 addr, const GIFRegTEXA& TEXA) const - { - return m_clut[ReadPixel8H(addr)]; - } - - __forceinline uint32 ReadTexel4HL(uint32 addr, const GIFRegTEXA& TEXA) const - { - return m_clut[ReadPixel4HL(addr)]; - } - - __forceinline uint32 ReadTexel4HH(uint32 addr, const GIFRegTEXA& TEXA) const - { - return m_clut[ReadPixel4HH(addr)]; - } - - __forceinline uint32 ReadTexel32(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel32(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel24(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel24(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel16(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel16(PixelAddress16(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel16S(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel16(PixelAddress16S(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel8(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel8(PixelAddress8(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel4(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel4(PixelAddress4(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel8H(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel8H(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel4HL(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel4HL(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel4HH(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel4HH(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel32Z(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel32(PixelAddress32Z(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel24Z(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel24(PixelAddress32Z(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel16Z(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel16(PixelAddress16Z(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - __forceinline uint32 ReadTexel16SZ(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const - { - return ReadTexel16(PixelAddress16SZ(x, y, TEX0.TBP0, TEX0.TBW), TEXA); - } - - // - - template - void WriteImageColumn(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF); - - template - void WriteImageBlock(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF); - - template - void WriteImageLeftRight(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF); - - template - void WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF); - - template - void WriteImage(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); - - void WriteImage24(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); - void WriteImage8H(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); - void WriteImage4HL(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); - void WriteImage4HH(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); - void WriteImage24Z(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); - void WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); - - // TODO: ReadImage32/24/... - - void ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const; - - // * => 32 - - void ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture8(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture4(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture8H(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture4HL(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture4HH(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - - void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - - void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock4(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock8H(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock4HL(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - - // pal ? 8 : 32 - - void ReadTexture8P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture4P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture8HP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture4HLP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - void ReadTexture4HHP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - - void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock8HP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock4HLP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - void ReadTextureBlock4HHP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; - - // - - template void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); - - // - - void SaveBMP(const string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h); -}; - diff --git a/plugins/GSdx_legacy/GSLzma.cpp b/plugins/GSdx_legacy/GSLzma.cpp deleted file mode 100644 index 77a76dc55e..0000000000 --- a/plugins/GSdx_legacy/GSLzma.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (C) 2015-2015 Gregory hainaut - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSLzma.h" - -#ifdef __linux__ - -GSDumpFile::GSDumpFile(char* filename) { - m_fp = fopen(filename, "rb"); - if (m_fp == NULL) { - fprintf(stderr, "failed to open %s\n", filename); - throw "BAD"; // Just exit the program - } -} - -GSDumpFile::~GSDumpFile() { - if (m_fp) - fclose(m_fp); -} - -/******************************************************************/ -#ifdef LZMA_SUPPORTED - -GSDumpLzma::GSDumpLzma(char* filename) : GSDumpFile(filename) { - - memset(&m_strm, 0, sizeof(lzma_stream)); - - lzma_ret ret = lzma_stream_decoder(&m_strm, UINT32_MAX, 0); - - if (ret != LZMA_OK) { - fprintf(stderr, "Error initializing the decoder! (error code %u)\n", ret); - throw "BAD"; // Just exit the program - } - - m_buff_size = 1024*1024; - m_area = (uint8_t*)_aligned_malloc(m_buff_size, 32); - m_inbuf = (uint8_t*)_aligned_malloc(BUFSIZ, 32); - m_avail = 0; - m_start = 0; - - m_strm.avail_in = 0; - m_strm.next_in = m_inbuf; - - m_strm.avail_out = m_buff_size; - m_strm.next_out = m_area; -} - -void GSDumpLzma::Decompress() { - lzma_action action = LZMA_RUN; - - m_strm.next_out = m_area; - m_strm.avail_out = m_buff_size; - - // Nothing left in the input buffer. Read data from the file - if (m_strm.avail_in == 0 && !feof(m_fp)) { - m_strm.next_in = m_inbuf; - m_strm.avail_in = fread(m_inbuf, 1, BUFSIZ, m_fp); - - if (ferror(m_fp)) { - fprintf(stderr, "Read error: %s\n", strerror(errno)); - throw "BAD"; // Just exit the program - } - } - - lzma_ret ret = lzma_code(&m_strm, action); - - if (ret != LZMA_OK) { - if (ret == LZMA_STREAM_END) - fprintf(stderr, "LZMA decoder finished without error\n\n"); - else { - fprintf(stderr, "Decoder error: (error code %u)\n", ret); - throw "BAD"; // Just exit the program - } - } - - m_start = 0; - m_avail = m_buff_size - m_strm.avail_out; -} - -bool GSDumpLzma::IsEof() { - return feof(m_fp) && (m_avail == 0); -} - -void GSDumpLzma::Read(void* ptr, size_t size) { - size_t off = 0; - uint8_t* dst = (uint8_t*)ptr; - while (size) { - if (m_avail == 0) { - Decompress(); - } - - size_t l = min(size, m_avail); - memcpy(dst + off, m_area+m_start, l); - m_avail -= l; - size -= l; - m_start += l; - off += l; - } -} - -GSDumpLzma::~GSDumpLzma() { - lzma_end(&m_strm); - - if (m_inbuf) - _aligned_free(m_inbuf); - if (m_area) - _aligned_free(m_area); -} - -#endif - -/******************************************************************/ - -GSDumpRaw::GSDumpRaw(char* filename) : GSDumpFile(filename) { - m_buff_size = 0; - m_area = NULL; - m_inbuf = NULL; - m_avail = 0; - m_start = 0; -} - -GSDumpRaw::~GSDumpRaw() { -} - -bool GSDumpRaw::IsEof() { - return feof(m_fp); -} - -void GSDumpRaw::Read(void* ptr, size_t size) { - if (size == 1) { - // I don't know why but read of size 1 is not happy - int v = fgetc(m_fp); - memcpy(ptr, &v, 1); - } else { - size_t ret = fread(ptr, 1, size, m_fp); - if (ret != size) { - fprintf(stderr, "GSDumpRaw:: Read error\n"); - throw "BAD"; // Just exit the program - } - } -} - -#endif diff --git a/plugins/GSdx_legacy/GSLzma.h b/plugins/GSdx_legacy/GSLzma.h deleted file mode 100644 index 596d97e255..0000000000 --- a/plugins/GSdx_legacy/GSLzma.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2015-2015 Gregory hainaut - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#ifdef __linux__ - -#ifdef LZMA_SUPPORTED -#include -#endif - -class GSDumpFile { - protected: - FILE* m_fp; - - - public: - virtual bool IsEof() = 0; - virtual void Read(void* ptr, size_t size) = 0; - - GSDumpFile(char* filename); - virtual ~GSDumpFile(); -}; - -#ifdef LZMA_SUPPORTED -class GSDumpLzma : public GSDumpFile { - - lzma_stream m_strm; - - size_t m_buff_size; - uint8_t* m_area; - uint8_t* m_inbuf; - - size_t m_avail; - size_t m_start; - - void Decompress(); - - public: - - GSDumpLzma(char* filename); - virtual ~GSDumpLzma(); - - bool IsEof(); - void Read(void* ptr, size_t size); -}; -#endif - -class GSDumpRaw : public GSDumpFile { - - size_t m_buff_size; - uint8_t* m_area; - uint8_t* m_inbuf; - - size_t m_avail; - size_t m_start; - - void Decompress(); - - public: - - GSDumpRaw(char* filename); - virtual ~GSDumpRaw(); - - bool IsEof(); - void Read(void* ptr, size_t size); -}; - -#endif diff --git a/plugins/GSdx_legacy/GSPerfMon.cpp b/plugins/GSdx_legacy/GSPerfMon.cpp deleted file mode 100644 index 52a82006ef..0000000000 --- a/plugins/GSdx_legacy/GSPerfMon.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSPerfMon.h" - -GSPerfMon::GSPerfMon() - : m_frame(0) - , m_lastframe(0) - , m_count(0) -{ - memset(m_counters, 0, sizeof(m_counters)); - memset(m_stats, 0, sizeof(m_stats)); - memset(m_total, 0, sizeof(m_total)); - memset(m_begin, 0, sizeof(m_begin)); -} - -void GSPerfMon::Put(counter_t c, double val) -{ -#ifndef DISABLE_PERF_MON - if(c == Frame) - { -#ifdef __linux__ - // clock on linux will return CLOCK_PROCESS_CPUTIME_ID. - // CLOCK_THREAD_CPUTIME_ID is much more useful to measure the fps - struct timespec ts; - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); - uint64 now = (uint64) ts.tv_sec * (uint64) 1e6 + (uint64) ts.tv_nsec / (uint64) 1e3; -#else - clock_t now = clock(); -#endif - - if(m_lastframe != 0) - { - m_counters[c] += (now - m_lastframe) * 1000 / CLOCKS_PER_SEC; - } - - m_lastframe = now; - m_frame++; - m_count++; - } - else - { - m_counters[c] += val; - } -#endif -} - -void GSPerfMon::Update() -{ -#ifndef DISABLE_PERF_MON - if(m_count > 0) - { - for(size_t i = 0; i < countof(m_counters); i++) - { - m_stats[i] = m_counters[i] / m_count; - } - - m_count = 0; - } - - memset(m_counters, 0, sizeof(m_counters)); -#endif -} - -void GSPerfMon::Start(int timer) -{ -#ifndef DISABLE_PERF_MON - m_start[timer] = __rdtsc(); - - if(m_begin[timer] == 0) - { - m_begin[timer] = m_start[timer]; - } -#endif -} - -void GSPerfMon::Stop(int timer) -{ -#ifndef DISABLE_PERF_MON - if(m_start[timer] > 0) - { - m_total[timer] += __rdtsc() - m_start[timer]; - m_start[timer] = 0; - } -#endif -} - -int GSPerfMon::CPU(int timer, bool reset) -{ - int percent = (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer])); - - if(reset) - { - m_begin[timer] = 0; - m_start[timer] = 0; - m_total[timer] = 0; - } - - return percent; -} diff --git a/plugins/GSdx_legacy/GSPerfMon.h b/plugins/GSdx_legacy/GSPerfMon.h deleted file mode 100644 index 45bedf9b88..0000000000 --- a/plugins/GSdx_legacy/GSPerfMon.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -class GSPerfMon -{ -public: - enum timer_t - { - Main, - Sync, - WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, - WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15, - TimerLast, - }; - - enum counter_t - { - Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad, SyncPoint, - CounterLast, - }; - -protected: - double m_counters[CounterLast]; - double m_stats[CounterLast]; - uint64 m_begin[TimerLast], m_total[TimerLast], m_start[TimerLast]; - uint64 m_frame; - clock_t m_lastframe; - int m_count; - - friend class GSPerfMonAutoTimer; - -public: - GSPerfMon(); - - void SetFrame(uint64 frame) {m_frame = frame;} - uint64 GetFrame() {return m_frame;} - - void Put(counter_t c, double val = 0); - double Get(counter_t c) {return m_stats[c];} - void Update(); - - void Start(int timer = Main); - void Stop(int timer = Main); - int CPU(int timer = Main, bool reset = true); -}; - -class GSPerfMonAutoTimer -{ - GSPerfMon* m_pm; - int m_timer; - -public: - GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main) {m_timer = timer; (m_pm = pm)->Start(m_timer);} - ~GSPerfMonAutoTimer() {m_pm->Stop(m_timer);} -}; diff --git a/plugins/GSdx_legacy/GSPng.cpp b/plugins/GSdx_legacy/GSPng.cpp deleted file mode 100644 index 62b7e7d4b5..0000000000 --- a/plugins/GSdx_legacy/GSPng.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2015-2015 Gregory hainaut - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSPng.h" -#include -#include - -struct { - int type; - int bytes_per_pixel_in; - int bytes_per_pixel_out; - int channel_bit_depth; - const char *extension[2]; -} static const pixel[GSPng::Format::COUNT] = { - {PNG_COLOR_TYPE_RGBA, 4, 4, 8 , {"_full.png", nullptr}}, // RGBA_PNG - {PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", nullptr}}, // RGB_PNG - {PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", "_alpha.png"}}, // RGB_A_PNG - {PNG_COLOR_TYPE_GRAY, 4, 1, 8 , {"_alpha.png", nullptr}}, // ALPHA_PNG - {PNG_COLOR_TYPE_GRAY, 1, 1, 8 , {"_R8I.png", nullptr}}, // R8I_PNG - {PNG_COLOR_TYPE_GRAY, 2, 2, 16, {"_R16I.png", nullptr}}, // R16I_PNG - {PNG_COLOR_TYPE_GRAY, 4, 2, 16, {"_R32I_lsb.png", "_R32I_msb.png"}}, // R32I_PNG -}; - -namespace GSPng { - - bool SaveFile(const string& file, Format fmt, uint8* image, uint8* row, - int width, int height, int pitch, int compression, - bool rb_swapped = false, bool first_image = false) - { - int channel_bit_depth = pixel[fmt].channel_bit_depth; - int bytes_per_pixel_in = pixel[fmt].bytes_per_pixel_in; - - int type = first_image ? pixel[fmt].type : PNG_COLOR_TYPE_GRAY; - int offset = first_image ? 0 : pixel[fmt].bytes_per_pixel_out; - int bytes_per_pixel_out = first_image ? pixel[fmt].bytes_per_pixel_out : bytes_per_pixel_in - offset; - - FILE *fp = fopen(file.c_str(), "wb"); - if (fp == nullptr) - return false; - - png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); - png_infop info_ptr = nullptr; - - bool success = false; - try { - if (png_ptr == nullptr) - throw GSDXRecoverableError(); - - info_ptr = png_create_info_struct(png_ptr); - if (info_ptr == nullptr) - throw GSDXRecoverableError(); - - if (setjmp(png_jmpbuf(png_ptr))) - throw GSDXRecoverableError(); - - if (compression < 0 || compression > Z_BEST_COMPRESSION) - compression = Z_BEST_SPEED; - - png_init_io(png_ptr, fp); - png_set_compression_level(png_ptr, compression); - png_set_IHDR(png_ptr, info_ptr, width, height, channel_bit_depth, type, - PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); - png_write_info(png_ptr, info_ptr); - - if (channel_bit_depth > 8) - png_set_swap(png_ptr); - if (rb_swapped && type != PNG_COLOR_TYPE_GRAY) - png_set_bgr(png_ptr); - - for (int y = 0; y < height; ++y, image += pitch) { - for (int x = 0; x < width; ++x) - for (int i = 0; i < bytes_per_pixel_out; ++i) - row[bytes_per_pixel_out * x + i] = image[bytes_per_pixel_in * x + i + offset]; - png_write_row(png_ptr, row); - } - png_write_end(png_ptr, nullptr); - - success = true; - } catch (GSDXRecoverableError&) { - fprintf(stderr, "Failed to write image %s\n", file.c_str()); - } - - if (png_ptr) - png_destroy_write_struct(&png_ptr, info_ptr ? &info_ptr : nullptr); - fclose(fp); - - return success; - } - - bool Save(GSPng::Format fmt, const string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped) - { - std::string root = file; - root.replace(file.length() - 4, 4, ""); - - ASSERT(fmt >= Format::START && fmt < Format::COUNT); - - std::unique_ptr row(new uint8[pixel[fmt].bytes_per_pixel_out * w]); - - std::string filename = root + pixel[fmt].extension[0]; - if (!SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression, rb_swapped, true)) - return false; - - // Second image - if (pixel[fmt].extension[1] == nullptr) - return true; - - filename = root + pixel[fmt].extension[1]; - return SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression); - } - - Transaction::Transaction(GSPng::Format fmt, const string& file, const uint8* image, int w, int h, int pitch, int compression) - : m_fmt(fmt), m_file(file), m_w(w), m_h(h), m_pitch(pitch), m_compression(compression) - { - // Note: yes it would be better to use shared pointer - m_image = (uint8*)_aligned_malloc(pitch*h, 32); - if (m_image) - memcpy(m_image, image, pitch*h); - } - - Transaction::~Transaction() - { - if (m_image) - _aligned_free(m_image); - } - - void Worker::Process(shared_ptr& item) - { - Save(item->m_fmt, item->m_file, item->m_image, item->m_w, item->m_h, item->m_pitch, item->m_compression); - } - -} diff --git a/plugins/GSdx_legacy/GSPng.h b/plugins/GSdx_legacy/GSPng.h deleted file mode 100644 index 6dd623fee3..0000000000 --- a/plugins/GSdx_legacy/GSPng.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 2015-2015 Gregory hainaut - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSThread_CXX11.h" - -namespace GSPng { - enum Format { - START = 0, - RGBA_PNG = 0, - RGB_PNG, - RGB_A_PNG, - ALPHA_PNG, - R8I_PNG, - R16I_PNG, - R32I_PNG, - COUNT - }; - - class Transaction - { - public: - Format m_fmt; - const std::string m_file; - uint8* m_image; - int m_w; - int m_h; - int m_pitch; - int m_compression; - - Transaction(GSPng::Format fmt, const string& file, const uint8* image, int w, int h, int pitch, int compression); - ~Transaction(); - }; - - bool Save(GSPng::Format fmt, const string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped = false); - - class Worker : public GSJobQueue, 16 > - { - public: - Worker() {}; - virtual ~Worker() {}; - - void Process(shared_ptr& item); - - int GetPixels(bool reset) {return 0;} - }; -} diff --git a/plugins/GSdx_legacy/GSRasterizer.cpp b/plugins/GSdx_legacy/GSRasterizer.cpp deleted file mode 100644 index 2075aac486..0000000000 --- a/plugins/GSdx_legacy/GSRasterizer.cpp +++ /dev/null @@ -1,1234 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -// TODO: JIT Draw* (flags: depth, texture, color (+iip), scissor) - -#include "stdafx.h" -#include "GSRasterizer.h" - -// - for more threads screen segments should be smaller to better distribute the pixels -// - but not too small to keep the threading overhead low -// - ideal value between 3 and 5, or log2(64 / number of threads) - -#define THREAD_HEIGHT 4 - -int GSRasterizerData::s_counter = 0; - -GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon) - : m_perfmon(perfmon) - , m_ds(ds) - , m_id(id) - , m_threads(threads) -{ - memset(&m_pixels, 0, sizeof(m_pixels)); - - m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false); - m_edge.count = 0; - - m_scanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64); - - int row = 0; - - while(row < (2048 >> THREAD_HEIGHT)) - { - for(int i = 0; i < threads; i++, row++) - { - m_scanline[row] = i == id ? 1 : 0; - } - } -} - -GSRasterizer::~GSRasterizer() -{ - _aligned_free(m_scanline); - - if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048); - - delete m_ds; -} - -bool GSRasterizer::IsOneOfMyScanlines(int top) const -{ - ASSERT(top >= 0 && top < 2048); - - return m_scanline[top >> THREAD_HEIGHT] != 0; -} - -bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const -{ - ASSERT(top >= 0 && top < 2048 && bottom >= 0 && bottom < 2048); - - top = top >> THREAD_HEIGHT; - bottom = (bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT; - - while(top < bottom) - { - if(m_scanline[top++]) - { - return true; - } - } - - return false; -} - -int GSRasterizer::FindMyNextScanline(int top) const -{ - int i = top >> THREAD_HEIGHT; - - if(m_scanline[i] == 0) - { - while(m_scanline[++i] == 0); - - top = i << THREAD_HEIGHT; - } - - return top; -} - -void GSRasterizer::Queue(const shared_ptr& data) -{ - Draw(data.get()); -} - -int GSRasterizer::GetPixels(bool reset) -{ - int pixels = m_pixels.sum; - - if(reset) - { - m_pixels.sum = 0; - } - - return pixels; -} - -void GSRasterizer::Draw(GSRasterizerData* data) -{ - GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id); - - if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return; - - m_pixels.actual = 0; - m_pixels.total = 0; - - data->start = __rdtsc(); - - m_ds->BeginDraw(data); - - const GSVertexSW* vertex = data->vertex; - const GSVertexSW* vertex_end = data->vertex + data->vertex_count; - - const uint32* index = data->index; - const uint32* index_end = data->index + data->index_count; - - uint32 tmp_index[] = {0, 1, 2}; - - bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor)); - - m_scissor = data->scissor; - m_fscissor_x = GSVector4(data->scissor).xzxz(); - m_fscissor_y = GSVector4(data->scissor).ywyw(); - - switch(data->primclass) - { - case GS_POINT_CLASS: - - if(scissor_test) - { - DrawPoint(vertex, data->vertex_count, index, data->index_count); - } - else - { - DrawPoint(vertex, data->vertex_count, index, data->index_count); - } - - break; - - case GS_LINE_CLASS: - - if(index != NULL) - { - do {DrawLine(vertex, index); index += 2;} - while(index < index_end); - } - else - { - do {DrawLine(vertex, tmp_index); vertex += 2;} - while(vertex < vertex_end); - } - - break; - - case GS_TRIANGLE_CLASS: - - if(index != NULL) - { - do {DrawTriangle(vertex, index); index += 3;} - while(index < index_end); - } - else - { - do {DrawTriangle(vertex, tmp_index); vertex += 3;} - while(vertex < vertex_end); - } - - break; - - case GS_SPRITE_CLASS: - - if(index != NULL) - { - do {DrawSprite(vertex, index); index += 2;} - while(index < index_end); - } - else - { - do {DrawSprite(vertex, tmp_index); vertex += 2;} - while(vertex < vertex_end); - } - - break; - - default: - __assume(0); - } - - #if _M_SSE >= 0x501 - _mm256_zeroupper(); - #endif - - data->pixels = m_pixels.actual; - - uint64 ticks = __rdtsc() - data->start; - - m_pixels.sum += m_pixels.actual; - - m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total); -} - -template -void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count) -{ - if(index != NULL) - { - for(int i = 0; i < index_count; i++, index++) - { - const GSVertexSW& v = vertex[*index]; - - GSVector4i p(v.p); - - if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) - { - if(IsOneOfMyScanlines(p.y)) - { - m_ds->SetupPrim(vertex, index, GSVertexSW::zero()); - - DrawScanline(1, p.x, p.y, v); - } - } - } - } - else - { - uint32 tmp_index[1] = {0}; - - for(int i = 0; i < vertex_count; i++, vertex++) - { - const GSVertexSW& v = vertex[0]; - - GSVector4i p(v.p); - - if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) - { - if(IsOneOfMyScanlines(p.y)) - { - m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero()); - - DrawScanline(1, p.x, p.y, v); - } - } - } - } -} - -void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index) -{ - const GSVertexSW& v0 = vertex[index[0]]; - const GSVertexSW& v1 = vertex[index[1]]; - - GSVertexSW dv = v1 - v0; - - GSVector4 dp = dv.p.abs(); - - int i = (dp < dp.yxwz()).mask() & 1; // |dx| <= |dy| - - if(m_ds->HasEdge()) - { - DrawEdge(v0, v1, dv, i, 0); - DrawEdge(v0, v1, dv, i, 1); - - Flush(vertex, index, GSVertexSW::zero(), true); - - return; - } - - GSVector4i dpi(dp); - - if(dpi.y == 0) - { - if(dpi.x > 0) - { - // shortcut for horizontal lines - - GSVector4 mask = (v0.p > v1.p).xxxx(); - - GSVertexSW scan; - - scan.p = v0.p.blend32(v1.p, mask); - scan.t = v0.t.blend32(v1.t, mask); - scan.c = v0.c.blend32(v1.c, mask); - - GSVector4i p(scan.p); - - if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y)) - { - GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil(); - GSVector4 l = lrf.max(m_fscissor_x); - GSVector4 r = lrf.min(m_fscissor_x); - GSVector4i lr = GSVector4i(l.xxyy(r)); - - int left = lr.extract32<0>(); - int right = lr.extract32<2>(); - - int pixels = right - left; - - if(pixels > 0) - { - GSVertexSW dscan = dv / dv.p.xxxx(); - - scan += dscan * (l - scan.p).xxxx(); - - m_ds->SetupPrim(vertex, index, dscan); - - DrawScanline(pixels, left, p.y, scan); - } - } - } - - return; - } - - int steps = dpi.v[i]; - - if(steps > 0) - { - GSVertexSW edge = v0; - GSVertexSW dedge = dv / GSVector4(dp.v[i]); - - GSVertexSW* RESTRICT e = m_edge.buff; - - while(1) - { - GSVector4i p(edge.p); - - if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) - { - if(IsOneOfMyScanlines(p.y)) - { - AddScanline(e, 1, p.x, p.y, edge); - - e++; - } - } - - if(--steps == 0) break; - - edge += dedge; - } - - m_edge.count = e - m_edge.buff; - - Flush(vertex, index, GSVertexSW::zero()); - } -} - -static const uint8 s_ysort[8][4] = -{ - {0, 1, 2, 0}, // y0 <= y1 <= y2 - {1, 0, 2, 0}, // y1 < y0 <= y2 - {0, 0, 0, 0}, - {1, 2, 0, 0}, // y1 <= y2 < y0 - {0, 2, 1, 0}, // y0 <= y2 < y1 - {0, 0, 0, 0}, - {2, 0, 1, 0}, // y2 < y0 <= y1 - {2, 1, 0, 0}, // y2 < y1 < y0 -}; - -#if _M_SSE >= 0x501 - -void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) -{ - GSVertexSW2 dv[3]; - GSVertexSW2 edge; - GSVertexSW2 dedge; - GSVertexSW2 dscan; - - GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p); - GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx(); - - int m1 = (y0011 > y1221).mask() & 7; - - int i[3]; - - i[0] = index[s_ysort[m1][0]]; - i[1] = index[s_ysort[m1][1]]; - i[2] = index[s_ysort[m1][2]]; - - const GSVertexSW2* _v = (const GSVertexSW2*)vertex; - - const GSVertexSW2& v0 = _v[i[0]]; - const GSVertexSW2& v1 = _v[i[1]]; - const GSVertexSW2& v2 = _v[i[2]]; - - y0011 = v0.p.yyyy(v1.p); - y1221 = v1.p.yyyy(v2.p).xzzx(); - - m1 = (y0011 == y1221).mask() & 7; - - // if(i == 0) => y0 < y1 < y2 - // if(i == 1) => y0 == y1 < y2 - // if(i == 4) => y0 < y1 == y2 - - if(m1 == 7) return; // y0 == y1 == y2 - - GSVector4 tbf = y0011.xzxz(y1221).ceil(); - GSVector4 tbmax = tbf.max(m_fscissor_y); - GSVector4 tbmin = tbf.min(m_fscissor_y); - GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(y0, t) max(y1, t) min(y1, b) min(y2, b) - - dv[0] = v1 - v0; - dv[1] = v2 - v0; - dv[2] = v2 - v1; - - GSVector4 cross = dv[0].p * dv[1].p.yxwz(); - - cross = (cross - cross.yxwz()).yyyy(); // select the second component, the negated cross product - - // the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value - - int m2 = cross.upl(cross == GSVector4::zero()).mask(); - - if(m2 & 2) return; - - m2 &= 1; - - cross = cross.rcpnr(); - - GSVector4 dxy01 = dv[0].p.xyxy(dv[1].p); - - GSVector4 dx = dxy01.xzxy(dv[2].p); - GSVector4 dy = dxy01.ywyx(dv[2].p); - - GSVector4 ddx[3]; - - ddx[0] = dx / dy; - ddx[1] = ddx[0].yxzw(); - ddx[2] = ddx[0].xzyw(); - - GSVector8 _dxy01c(dxy01 * cross); - - /* - dscan = dv[1] * dxy01c.yyyy() - dv[0] * dxy01c.wwww(); - dedge = dv[0] * dxy01c.zzzz() - dv[1] * dxy01c.xxxx(); - */ - - dscan.p = dv[1].p * _dxy01c.yyyy().extract<0>() - dv[0].p * _dxy01c.wwww().extract<0>(); - dscan.tc = dv[1].tc * _dxy01c.yyyy() - dv[0].tc * _dxy01c.wwww(); - - dedge.p = dv[0].p * _dxy01c.zzzz().extract<0>() - dv[1].p * _dxy01c.xxxx().extract<0>(); - dedge.tc = dv[0].tc * _dxy01c.zzzz() - dv[1].tc * _dxy01c.xxxx(); - - if(m1 & 1) - { - if(tb.y < tb.w) - { - edge = _v[i[1 - m2]]; - - edge.p = edge.p.insert32<0, 1>(vertex[i[m2]].p); - dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p); - - DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p); - } - } - else - { - if(tb.x < tb.z) - { - edge = v0; - - edge.p = edge.p.xxzw(); - dedge.p = ddx[m2].xyzw(dedge.p); - - DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p); - } - - if(tb.y < tb.w) - { - edge = v1; - - edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p); - dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p); - - DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p); - } - } - - Flush(vertex, index, (GSVertexSW&)dscan); - - if(m_ds->HasEdge()) - { - GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy| - GSVector4 b = dx < GSVector4::zero(); // dx < 0 - GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0 - - int orientation = a.mask(); - int side = ((a | b) ^ c).mask() ^ 2; // evil - - DrawEdge((GSVertexSW&)v0, (GSVertexSW&)v1, (GSVertexSW&)dv[0], orientation & 1, side & 1); - DrawEdge((GSVertexSW&)v0, (GSVertexSW&)v2, (GSVertexSW&)dv[1], orientation & 2, side & 2); - DrawEdge((GSVertexSW&)v1, (GSVertexSW&)v2, (GSVertexSW&)dv[2], orientation & 4, side & 4); - - Flush(vertex, index, GSVertexSW::zero(), true); - } -} - -void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, const GSVertexSW2& dedge, const GSVertexSW2& dscan, const GSVector4& p0) -{ - ASSERT(top < bottom); - ASSERT(edge.p.x <= edge.p.y); - - GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count]; - - GSVector4 scissor = m_fscissor_x; - - top = FindMyNextScanline(top); - - while(top < bottom) - { - GSVector8 dy(GSVector4(top) - p0.yyyy()); - - GSVertexSW2 scan; - - scan.p = edge.p + dedge.p * dy.extract<0>(); - - GSVector4 lrf = scan.p.ceil(); - GSVector4 l = lrf.max(scissor); - GSVector4 r = lrf.min(scissor); - GSVector4i lr = GSVector4i(l.xxyy(r)); - - int left = lr.extract32<0>(); - int right = lr.extract32<2>(); - - int pixels = right - left; - - if(pixels > 0) - { - scan.tc = edge.tc + dedge.tc * dy; - - GSVector8 prestep((l - p0).xxxx()); - - scan.p = scan.p + dscan.p * prestep.extract<0>(); - scan.tc = scan.tc + dscan.tc * prestep; - - AddScanline(e++, pixels, left, top, (GSVertexSW&)scan); - } - - top++; - - if(!IsOneOfMyScanlines(top)) - { - top += (m_threads - 1) << THREAD_HEIGHT; - } - } - - m_edge.count += e - &m_edge.buff[m_edge.count]; -} - -#else - -void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) -{ - GSVertexSW dv[3]; - GSVertexSW edge; - GSVertexSW dedge; - GSVertexSW dscan; - - GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p); - GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx(); - - int m1 = (y0011 > y1221).mask() & 7; - - int i[3]; - - i[0] = index[s_ysort[m1][0]]; - i[1] = index[s_ysort[m1][1]]; - i[2] = index[s_ysort[m1][2]]; - - const GSVertexSW& v0 = vertex[i[0]]; - const GSVertexSW& v1 = vertex[i[1]]; - const GSVertexSW& v2 = vertex[i[2]]; - - y0011 = v0.p.yyyy(v1.p); - y1221 = v1.p.yyyy(v2.p).xzzx(); - - m1 = (y0011 == y1221).mask() & 7; - - // if(i == 0) => y0 < y1 < y2 - // if(i == 1) => y0 == y1 < y2 - // if(i == 4) => y0 < y1 == y2 - - if(m1 == 7) return; // y0 == y1 == y2 - - GSVector4 tbf = y0011.xzxz(y1221).ceil(); - GSVector4 tbmax = tbf.max(m_fscissor_y); - GSVector4 tbmin = tbf.min(m_fscissor_y); - GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(y0, t) max(y1, t) min(y1, b) min(y2, b) - - dv[0] = v1 - v0; - dv[1] = v2 - v0; - dv[2] = v2 - v1; - - GSVector4 cross = dv[0].p * dv[1].p.yxwz(); - - cross = (cross - cross.yxwz()).yyyy(); // select the second component, the negated cross product - - // the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value - - int m2 = cross.upl(cross == GSVector4::zero()).mask(); - - if(m2 & 2) return; - - m2 &= 1; - - cross = cross.rcpnr(); - - GSVector4 dxy01 = dv[0].p.xyxy(dv[1].p); - - GSVector4 dx = dxy01.xzxy(dv[2].p); - GSVector4 dy = dxy01.ywyx(dv[2].p); - - GSVector4 ddx[3]; - - ddx[0] = dx / dy; - ddx[1] = ddx[0].yxzw(); - ddx[2] = ddx[0].xzyw(); - - GSVector4 dxy01c = dxy01 * cross; - - /* - dscan = dv[1] * dxy01c.yyyy() - dv[0] * dxy01c.wwww(); - dedge = dv[0] * dxy01c.zzzz() - dv[1] * dxy01c.xxxx(); - */ - - dscan.p = dv[1].p * dxy01c.yyyy() - dv[0].p * dxy01c.wwww(); - dscan.t = dv[1].t * dxy01c.yyyy() - dv[0].t * dxy01c.wwww(); - dscan.c = dv[1].c * dxy01c.yyyy() - dv[0].c * dxy01c.wwww(); - - dedge.p = dv[0].p * dxy01c.zzzz() - dv[1].p * dxy01c.xxxx(); - dedge.t = dv[0].t * dxy01c.zzzz() - dv[1].t * dxy01c.xxxx(); - dedge.c = dv[0].c * dxy01c.zzzz() - dv[1].c * dxy01c.xxxx(); - - if(m1 & 1) - { - if(tb.y < tb.w) - { - edge = vertex[i[1 - m2]]; - - edge.p = edge.p.insert32<0, 1>(vertex[i[m2]].p); - dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p); - - DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p); - } - } - else - { - if(tb.x < tb.z) - { - edge = v0; - - edge.p = edge.p.xxzw(); - dedge.p = ddx[m2].xyzw(dedge.p); - - DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p); - } - - if(tb.y < tb.w) - { - edge = v1; - - edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p); - dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p); - - DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p); - } - } - - Flush(vertex, index, dscan); - - if(m_ds->HasEdge()) - { - GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy| - GSVector4 b = dx < GSVector4::zero(); // dx < 0 - GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0 - - int orientation = a.mask(); - int side = ((a | b) ^ c).mask() ^ 2; // evil - - DrawEdge(v0, v1, dv[0], orientation & 1, side & 1); - DrawEdge(v0, v2, dv[1], orientation & 2, side & 2); - DrawEdge(v1, v2, dv[2], orientation & 4, side & 4); - - Flush(vertex, index, GSVertexSW::zero(), true); - } -} - -void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0) -{ - ASSERT(top < bottom); - ASSERT(edge.p.x <= edge.p.y); - - GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count]; - - GSVector4 scissor = m_fscissor_x; - - top = FindMyNextScanline(top); - - while(top < bottom) - { - GSVector4 dy = GSVector4(top) - p0.yyyy(); - - GSVertexSW scan; - - scan.p = edge.p + dedge.p * dy; - - GSVector4 lrf = scan.p.ceil(); - GSVector4 l = lrf.max(scissor); - GSVector4 r = lrf.min(scissor); - GSVector4i lr = GSVector4i(l.xxyy(r)); - - int left = lr.extract32<0>(); - int right = lr.extract32<2>(); - - int pixels = right - left; - - if(pixels > 0) - { - scan.t = edge.t + dedge.t * dy; - scan.c = edge.c + dedge.c * dy; - - GSVector4 prestep = (l - p0).xxxx(); - - scan.p = scan.p + dscan.p * prestep; - scan.t = scan.t + dscan.t * prestep; - scan.c = scan.c + dscan.c * prestep; - - AddScanline(e++, pixels, left, top, scan); - } - - top++; - - if(!IsOneOfMyScanlines(top)) - { - top += (m_threads - 1) << THREAD_HEIGHT; - } - } - - m_edge.count += e - &m_edge.buff[m_edge.count]; -} - -#endif - -void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index) -{ - const GSVertexSW& v0 = vertex[index[0]]; - const GSVertexSW& v1 = vertex[index[1]]; - - GSVector4 mask = (v0.p < v1.p).xyzw(GSVector4::zero()); - - GSVertexSW v[2]; - - v[0].p = v1.p.blend32(v0.p, mask); - v[0].t = v1.t.blend32(v0.t, mask); - v[0].c = v1.c; - - v[1].p = v0.p.blend32(v1.p, mask); - v[1].t = v0.t.blend32(v1.t, mask); - - GSVector4i r(v[0].p.xyxy(v[1].p).ceil()); - - r = r.rintersect(m_scissor); - - if(r.rempty()) return; - - GSVertexSW scan = v[0]; - - if(m_ds->IsSolidRect()) - { - if(m_threads == 1) - { - m_ds->DrawRect(r, scan); - - int pixels = r.width() * r.height(); - - m_pixels.actual += pixels; - m_pixels.total += pixels; - } - else - { - int top = FindMyNextScanline(r.top); - int bottom = r.bottom; - - while(top < bottom) - { - r.top = top; - r.bottom = std::min((top + (1 << THREAD_HEIGHT)) & ~((1 << THREAD_HEIGHT) - 1), bottom); - - m_ds->DrawRect(r, scan); - - int pixels = r.width() * r.height(); - - m_pixels.actual += pixels; - m_pixels.total += pixels; - - top = r.bottom + ((m_threads - 1) << THREAD_HEIGHT); - } - } - - return; - } - - GSVertexSW dv = v[1] - v[0]; - - GSVector4 dt = dv.t / dv.p.xyxy(); - - GSVertexSW dedge; - GSVertexSW dscan; - - dedge.t = GSVector4::zero().insert32<1, 1>(dt); - dscan.t = GSVector4::zero().insert32<0, 0>(dt); - - GSVector4 prestep = GSVector4(r.left, r.top) - scan.p; - - int m = (prestep == GSVector4::zero()).mask(); - - if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy(); - if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx(); - - m_ds->SetupPrim(vertex, index, dscan); - - while(1) - { - if(IsOneOfMyScanlines(r.top)) - { - DrawScanline(r.width(), r.left, r.top, scan); - } - - if(++r.top >= r.bottom) break; - - scan.t += dedge.t; - } -} - -void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side) -{ - // orientation: - // - true: |dv.p.y| > |dv.p.x| - // - false |dv.p.x| > |dv.p.y| - // side: - // - true: top/left edge - // - false: bottom/right edge - - // TODO: bit slow and too much duplicated code - // TODO: inner pre-step is still missing (hardly noticable) - // TODO: it does not always line up with the edge of the surrounded triangle - - GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count]; - - if(orientation) - { - GSVector4 tbf = v0.p.yyyy(v1.p).ceil(); // t t b b - GSVector4 tbmax = tbf.max(m_fscissor_y); // max(t, st) max(t, sb) max(b, st) max(b, sb) - GSVector4 tbmin = tbf.min(m_fscissor_y); // min(t, st) min(t, sb) min(b, st) min(b, sb) - GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(t, st) max(b, sb) min(t, st) min(b, sb) - - int top, bottom; - - GSVertexSW edge, dedge; - - if((dv.p >= GSVector4::zero()).mask() & 2) - { - top = tb.extract32<0>(); // max(t, st) - bottom = tb.extract32<3>(); // min(b, sb) - - if(top >= bottom) return; - - edge = v0; - dedge = dv / dv.p.yyyy(); - - edge += dedge * (tbmax.xxxx() - edge.p.yyyy()); - } - else - { - top = tb.extract32<1>(); // max(b, st) - bottom = tb.extract32<2>(); // min(t, sb) - - if(top >= bottom) return; - - edge = v1; - dedge = dv / dv.p.yyyy(); - - edge += dedge * (tbmax.zzzz() - edge.p.yyyy()); - } - - GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000); - - int x = p.extract32<0>(); - int dx = p.extract32<1>(); - - if(side) - { - while(1) - { - int xi = x >> 16; - int xf = x & 0xffff; - - if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top)) - { - AddScanline(e, 1, xi, top, edge); - - e->t.u32[3] = (0x10000 - xf) & 0xffff; - - e++; - } - - if(++top >= bottom) break; - - edge += dedge; - x += dx; - } - } - else - { - while(1) - { - int xi = (x >> 16) + 1; - int xf = x & 0xffff; - - if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top)) - { - AddScanline(e, 1, xi, top, edge); - - e->t.u32[3] = xf; - - e++; - } - - if(++top >= bottom) break; - - edge += dedge; - x += dx; - } - } - } - else - { - GSVector4 lrf = v0.p.xxxx(v1.p).ceil(); // l l r r - GSVector4 lrmax = lrf.max(m_fscissor_x); // max(l, sl) max(l, sr) max(r, sl) max(r, sr) - GSVector4 lrmin = lrf.min(m_fscissor_x); // min(l, sl) min(l, sr) min(r, sl) min(r, sr) - GSVector4i lr = GSVector4i(lrmax.xzyw(lrmin)); // max(l, sl) max(r, sl) min(l, sr) min(r, sr) - - int left, right; - - GSVertexSW edge, dedge; - - if((dv.p >= GSVector4::zero()).mask() & 1) - { - left = lr.extract32<0>(); // max(l, sl) - right = lr.extract32<3>(); // min(r, sr) - - if(left >= right) return; - - edge = v0; - dedge = dv / dv.p.xxxx(); - - edge += dedge * (lrmax.xxxx() - edge.p.xxxx()); - } - else - { - left = lr.extract32<1>(); // max(r, sl) - right = lr.extract32<2>(); // min(l, sr) - - if(left >= right) return; - - edge = v1; - dedge = dv / dv.p.xxxx(); - - edge += dedge * (lrmax.zzzz() - edge.p.xxxx()); - } - - GSVector4i p = GSVector4i(edge.p.upl(dedge.p) * 0x10000); - - int y = p.extract32<2>(); - int dy = p.extract32<3>(); - - if(side) - { - while(1) - { - int yi = y >> 16; - int yf = y & 0xffff; - - if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi)) - { - AddScanline(e, 1, left, yi, edge); - - e->t.u32[3] = (0x10000 - yf) & 0xffff; - - e++; - } - - if(++left >= right) break; - - edge += dedge; - y += dy; - } - } - else - { - while(1) - { - int yi = (y >> 16) + 1; - int yf = y & 0xffff; - - if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi)) - { - AddScanline(e, 1, left, yi, edge); - - e->t.u32[3] = yf; - - e++; - } - - if(++left >= right) break; - - edge += dedge; - y += dy; - } - } - } - - m_edge.count += e - &m_edge.buff[m_edge.count]; -} - -void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan) -{ - *e = scan; - - e->_pad.i32[0] = pixels; - e->_pad.i32[1] = left; - e->_pad.i32[2] = top; -} - -void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge) -{ - // TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline) - - int count = m_edge.count; - - if(count > 0) - { - m_ds->SetupPrim(vertex, index, dscan); - - const GSVertexSW* RESTRICT e = m_edge.buff; - const GSVertexSW* RESTRICT ee = e + count; - - if(!edge) - { - do - { - int pixels = e->_pad.i32[0]; - int left = e->_pad.i32[1]; - int top = e->_pad.i32[2]; - - DrawScanline(pixels, left, top, *e++); - } - while(e < ee); - } - else - { - do - { - int pixels = e->_pad.i32[0]; - int left = e->_pad.i32[1]; - int top = e->_pad.i32[2]; - - DrawEdge(pixels, left, top, *e++); - } - while(e < ee); - } - - m_edge.count = 0; - } -} - -#if _M_SSE >= 0x501 -#define PIXELS_PER_LOOP 8 -#else -#define PIXELS_PER_LOOP 4 -#endif - -void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) -{ - m_pixels.actual += pixels; - m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - (left & (PIXELS_PER_LOOP - 1)); - //m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - left; - - ASSERT(m_pixels.actual <= m_pixels.total); - - m_ds->DrawScanline(pixels, left, top, scan); -} - -void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) -{ - m_pixels.actual += 1; - m_pixels.total += PIXELS_PER_LOOP - 1; - - ASSERT(m_pixels.actual <= m_pixels.total); - - m_ds->DrawEdge(pixels, left, top, scan); -} - -// - -GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon) - : m_perfmon(perfmon) -{ - m_scanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64); - - int row = 0; - - while(row < (2048 >> THREAD_HEIGHT)) - { - for(int i = 0; i < threads; i++, row++) - { - m_scanline[row] = (uint8)i; - } - } -} - -GSRasterizerList::~GSRasterizerList() -{ - for(auto i = m_workers.begin(); i != m_workers.end(); i++) - { - delete *i; - } - - _aligned_free(m_scanline); -} - -void GSRasterizerList::Queue(const shared_ptr& data) -{ - GSVector4i r = data->bbox.rintersect(data->scissor); - - ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048); - - int top = r.top >> THREAD_HEIGHT; - int bottom = std::min((r.bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT, top + m_workers.size()); - - while(top < bottom) - { - m_workers[m_scanline[top++]]->Push(data); - } -} - -void GSRasterizerList::Sync() -{ - if(!IsSynced()) - { - for(size_t i = 0; i < m_workers.size(); i++) - { - m_workers[i]->Wait(); - } - - m_perfmon->Put(GSPerfMon::SyncPoint, 1); - } -} - -bool GSRasterizerList::IsSynced() const -{ - for(size_t i = 0; i < m_workers.size(); i++) - { - if(!m_workers[i]->IsEmpty()) - { - return false; - } - } - - return true; -} - -int GSRasterizerList::GetPixels(bool reset) -{ - int pixels = 0; - - for(size_t i = 0; i < m_workers.size(); i++) - { - pixels += m_workers[i]->GetPixels(reset); - } - - return pixels; -} - -// GSRasterizerList::GSWorker - -GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r) - : GSJobQueue, 256>() - , m_r(r) -{ -} - -GSRasterizerList::GSWorker::~GSWorker() -{ - Wait(); - - delete m_r; -} - -int GSRasterizerList::GSWorker::GetPixels(bool reset) -{ - return m_r->GetPixels(reset); -} - -void GSRasterizerList::GSWorker::Process(shared_ptr& item) -{ - m_r->Draw(item.get()); -} diff --git a/plugins/GSdx_legacy/GSRasterizer.h b/plugins/GSdx_legacy/GSRasterizer.h deleted file mode 100644 index 35d42c8ff4..0000000000 --- a/plugins/GSdx_legacy/GSRasterizer.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSVertexSW.h" -#include "GSFunctionMap.h" -#include "GSAlignedClass.h" -#include "GSPerfMon.h" -#include "GSThread_CXX11.h" - -__aligned(class, 32) GSRasterizerData : public GSAlignedClass<32> -{ - static int s_counter; - -public: - GSVector4i scissor; - GSVector4i bbox; - GS_PRIM_CLASS primclass; - uint8* buff; - GSVertexSW* vertex; - int vertex_count; - uint32* index; - int index_count; - uint64 frame; - uint64 start; - int pixels; - int counter; - - GSRasterizerData() - : scissor(GSVector4i::zero()) - , bbox(GSVector4i::zero()) - , primclass(GS_INVALID_CLASS) - , buff(NULL) - , vertex(NULL) - , vertex_count(0) - , index(NULL) - , index_count(0) - , frame(0) - , start(0) - , pixels(0) - { - counter = s_counter++; - } - - virtual ~GSRasterizerData() - { - if(buff != NULL) _aligned_free(buff); - } -}; - -class IDrawScanline : public GSAlignedClass<32> -{ -public: - typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan); - typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan); - typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit - -protected: - SetupPrimPtr m_sp; - DrawScanlinePtr m_ds; - DrawScanlinePtr m_de; - DrawRectPtr m_dr; - -public: - IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {} - virtual ~IDrawScanline() {} - - virtual void BeginDraw(const GSRasterizerData* data) = 0; - virtual void EndDraw(uint64 frame, uint64 ticks, int actual, int total) = 0; - -#ifdef ENABLE_JIT_RASTERIZER - - __forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) {m_sp(vertex, index, dscan);} - __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);} - __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);} - __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);} - -#else - - virtual void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) = 0; - virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0; - virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0; - virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0; - -#endif - - virtual void PrintStats() = 0; - - __forceinline bool HasEdge() const {return m_de != NULL;} - __forceinline bool IsSolidRect() const {return m_dr != NULL;} -}; - -class IRasterizer : public GSAlignedClass<32> -{ -public: - virtual ~IRasterizer() {} - - virtual void Queue(const shared_ptr& data) = 0; - virtual void Sync() = 0; - virtual bool IsSynced() const = 0; - virtual int GetPixels(bool reset = true) = 0; - virtual void PrintStats() = 0; -}; - -__aligned(class, 32) GSRasterizer : public IRasterizer -{ -protected: - GSPerfMon* m_perfmon; - IDrawScanline* m_ds; - int m_id; - int m_threads; - uint8* m_scanline; - GSVector4i m_scissor; - GSVector4 m_fscissor_x; - GSVector4 m_fscissor_y; - struct {GSVertexSW* buff; int count;} m_edge; - struct {int sum, actual, total;} m_pixels; - - typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count); - - template - void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count); - void DrawLine(const GSVertexSW* vertex, const uint32* index); - void DrawTriangle(const GSVertexSW* vertex, const uint32* index); - void DrawSprite(const GSVertexSW* vertex, const uint32* index); - - #if _M_SSE >= 0x501 - __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, const GSVertexSW2& dedge, const GSVertexSW2& dscan, const GSVector4& p0); - #else - __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0); - #endif - - void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side); - - __forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan); - __forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false); - - __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan); - __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan); - -public: - GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon); - virtual ~GSRasterizer(); - - __forceinline bool IsOneOfMyScanlines(int top) const; - __forceinline bool IsOneOfMyScanlines(int top, int bottom) const; - __forceinline int FindMyNextScanline(int top) const; - - void Draw(GSRasterizerData* data); - - // IRasterizer - - void Queue(const shared_ptr& data); - void Sync() {} - bool IsSynced() const {return true;} - int GetPixels(bool reset); - void PrintStats() {m_ds->PrintStats();} -}; - -class GSRasterizerList : public IRasterizer -{ -protected: - class GSWorker : public GSJobQueue, 256 > - { - GSRasterizer* m_r; - - public: - GSWorker(GSRasterizer* r); - virtual ~GSWorker(); - - int GetPixels(bool reset); - - // GSJobQueue - - void Process(shared_ptr& item); - }; - - GSPerfMon* m_perfmon; - vector m_workers; - uint8* m_scanline; - - GSRasterizerList(int threads, GSPerfMon* perfmon); - -public: - virtual ~GSRasterizerList(); - - template static IRasterizer* Create(int threads, GSPerfMon* perfmon) - { - threads = std::max(threads, 0); - - if(threads == 0) - { - return new GSRasterizer(new DS(), 0, 1, perfmon); - } - else - { - GSRasterizerList* rl = new GSRasterizerList(threads, perfmon); - - for(int i = 0; i < threads; i++) - { - rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon))); - } - - return rl; - } - } - - // IRasterizer - - void Queue(const shared_ptr& data); - void Sync(); - bool IsSynced() const; - int GetPixels(bool reset); - void PrintStats() {} -}; diff --git a/plugins/GSdx_legacy/GSRenderer.cpp b/plugins/GSdx_legacy/GSRenderer.cpp deleted file mode 100644 index 2f34956b75..0000000000 --- a/plugins/GSdx_legacy/GSRenderer.cpp +++ /dev/null @@ -1,654 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRenderer.h" -#ifdef __linux__ -#include -#endif - -const unsigned int s_interlace_nb = 8; -const unsigned int s_post_shader_nb = 5; -const unsigned int s_aspect_ratio_nb = 3; - -GSRenderer::GSRenderer() - : m_shader(0) - , m_shift_key(false) - , m_control_key(false) - , m_framelimit(false) - , m_texture_shuffle(false) - , m_wnd(NULL) - , m_dev(NULL) -{ - m_GStitleInfoBuffer[0] = 0; - - m_interlace = theApp.GetConfig("interlace", 7) % s_interlace_nb; - m_aspectratio = theApp.GetConfig("aspectratio", 1) % s_aspect_ratio_nb; - m_shader = theApp.GetConfig("TVShader", 0) % s_post_shader_nb; - m_filter = theApp.GetConfig("filter", 1); - m_vsync = !!theApp.GetConfig("vsync", 0); - m_aa1 = !!theApp.GetConfig("aa1", 0); - m_fxaa = !!theApp.GetConfig("fxaa", 0); - m_shaderfx = !!theApp.GetConfig("shaderfx", 0); - m_shadeboost = !!theApp.GetConfig("ShadeBoost", 0); -} - -GSRenderer::~GSRenderer() -{ - /*if(m_dev) - { - m_dev->Reset(1, 1, GSDevice::Windowed); - }*/ - - delete m_dev; - - if (m_wnd) - { - delete m_wnd; - } -} - -bool GSRenderer::CreateWnd(const string& title, int w, int h) -{ - return m_wnd->Create(title.c_str(), w, h); -} - -bool GSRenderer::CreateDevice(GSDevice* dev) -{ - ASSERT(dev); - ASSERT(!m_dev); - - if(!dev->Create(m_wnd)) - { - return false; - } - - m_dev = dev; - m_dev->SetVSync(m_vsync && m_framelimit); - - return true; -} - -void GSRenderer::ResetDevice() -{ - if(m_dev) m_dev->Reset(1, 1); -} - -bool GSRenderer::Merge(int field) -{ - bool en[2]; - - GSVector4i fr[2]; - GSVector4i dr[2]; - - int baseline = INT_MAX; - - for(int i = 0; i < 2; i++) - { - en[i] = IsEnabled(i); - - if(en[i]) - { - fr[i] = GetFrameRect(i); - dr[i] = GetDisplayRect(i); - - baseline = min(dr[i].top, baseline); - - //printf("[%d]: %d %d %d %d, %d %d %d %d\n", i, fr[i].x,fr[i].y,fr[i].z,fr[i].w , dr[i].x,dr[i].y,dr[i].z,dr[i].w); - } - } - - if(!en[0] && !en[1]) - { - return false; - } - - GL_PUSH("Renderer Merge %d", s_n); - - // try to avoid fullscreen blur, could be nice on tv but on a monitor it's like double vision, hurts my eyes (persona 4, guitar hero) - // - // NOTE: probably the technique explained in graphtip.pdf (Antialiasing by Supersampling / 4. Reading Odd/Even Scan Lines Separately with the PCRTC then Blending) - - bool samesrc = - en[0] && en[1] && - m_regs->DISP[0].DISPFB.FBP == m_regs->DISP[1].DISPFB.FBP && - m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && - m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; - - // bool blurdetected = false; - - if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) - { - if(fr[0].eq(fr[1] + GSVector4i(0, -1, 0, 0)) && dr[0].eq(dr[1] + GSVector4i(0, 0, 0, 1)) - || fr[1].eq(fr[0] + GSVector4i(0, -1, 0, 0)) && dr[1].eq(dr[0] + GSVector4i(0, 0, 0, 1))) - { - // persona 4: - // - // fr[0] = 0 0 640 448 - // fr[1] = 0 1 640 448 - // dr[0] = 159 50 779 498 - // dr[1] = 159 50 779 497 - // - // second image shifted up by 1 pixel and blended over itself - // - // god of war: - // - // fr[0] = 0 1 512 448 - // fr[1] = 0 0 512 448 - // dr[0] = 127 50 639 497 - // dr[1] = 127 50 639 498 - // - // same just the first image shifted - - int top = min(fr[0].top, fr[1].top); - int bottom = max(dr[0].bottom, dr[1].bottom); - - fr[0].top = top; - fr[1].top = top; - dr[0].bottom = bottom; - dr[1].bottom = bottom; - - // blurdetected = true; - } - else if(dr[0].eq(dr[1]) && (fr[0].eq(fr[1] + GSVector4i(0, 1, 0, 1)) || fr[1].eq(fr[0] + GSVector4i(0, 1, 0, 1)))) - { - // dq5: - // - // fr[0] = 0 1 512 445 - // fr[1] = 0 0 512 444 - // dr[0] = 127 50 639 494 - // dr[1] = 127 50 639 494 - - int top = min(fr[0].top, fr[1].top); - int bottom = min(fr[0].bottom, fr[1].bottom); - - fr[0].top = fr[1].top = top; - fr[0].bottom = fr[1].bottom = bottom; - - // blurdetected = true; - } - //printf("samesrc = %d blurdetected = %d\n",samesrc,blurdetected); - } - - GSVector2i fs(0, 0); - GSVector2i ds(0, 0); - - GSTexture* tex[2] = {NULL, NULL}; - - if(samesrc && fr[0].bottom == fr[1].bottom) - { - tex[0] = GetOutput(0); - tex[1] = tex[0]; // saves one texture fetch - } - else - { - if(en[0]) tex[0] = GetOutput(0); - if(en[1]) tex[1] = GetOutput(1); - } - - GSVector4 src[2]; - GSVector4 dst[2]; - - for(int i = 0; i < 2; i++) - { - if(!en[i] || !tex[i]) continue; - - GSVector4i r = fr[i]; - - // overscan hack - - if(dr[i].height() > 512) // hmm - { - int y = GetDeviceSize(i).y; - r.bottom = r.top + y; - } - - GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); - - src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); - - GSVector2 off(0, 0); - - if(dr[i].top - baseline >= 4) // 2? - { - off.y = tex[i]->GetScale().y * (dr[i].top - baseline); - - if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) - { - off.y /= 2; - } - } - - dst[i] = GSVector4(off).xyxy() + scale * GSVector4(r.rsize()); - - fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); - fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); - } - - ds = fs; - - if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) - { - ds.y *= 2; - } - - bool slbg = m_regs->PMODE.SLBG; - bool mmod = m_regs->PMODE.MMOD; - - if(tex[0] || tex[1]) - { - if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) - { - // the two outputs are identical, skip drawing one of them (the one that is alpha blended) - - tex[0] = NULL; - } - - GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; - - m_dev->Merge(tex, src, dst, fs, slbg, mmod, c); - - if(m_regs->SMODE2.INT && m_interlace > 0) - { - if (m_interlace == 7 && m_regs->SMODE2.FFMD == 1) // Auto interlace enabled / Odd frame interlace setting - { - int field2 = 0; - int mode = 2; - m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); - } - else - { - int field2 = 1 - ((m_interlace - 1) & 1); - int mode = (m_interlace - 1) >> 1; - m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); - } - } - - if(m_shadeboost) - { - m_dev->ShadeBoost(); - } - - if (m_shaderfx) - { - m_dev->ExternalFX(); - } - - if(m_fxaa) - { - m_dev->FXAA(); - } - } - - GL_POP(); - - return true; -} - -void GSRenderer::SetFrameLimit(bool limit) -{ - m_framelimit = limit; - - if(m_dev) m_dev->SetVSync(m_vsync && m_framelimit); -} - -void GSRenderer::SetVSync(bool enabled) -{ - m_vsync = enabled; - - if(m_dev) m_dev->SetVSync(m_vsync); -} - -void GSRenderer::VSync(int field) -{ - GSPerfMonAutoTimer pmat(&m_perfmon); - - m_perfmon.Put(GSPerfMon::Frame); - - Flush(); - - if(!m_dev->IsLost(true)) - { - if(!Merge(field ? 1 : 0)) - { - return; - } - } - else - { - ResetDevice(); - } - - m_dev->AgePool(); - - // osd - - if((m_perfmon.GetFrame() & 0x1f) == 0) - { - m_perfmon.Update(); - - double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); - - GSVector4i r = GetDisplayRect(); - - string s; - -#ifdef GSTITLEINFO_API_FORCE_VERBOSE - if (1)//force verbose reply -#else - if (m_wnd->IsManaged()) -#endif - { - //GSdx owns the window's title, be verbose. - - string s2 = m_regs->SMODE2.INT ? (string("Interlaced ") + (m_regs->SMODE2.FFMD ? "(frame)" : "(field)")) : "Progressive"; - - s = format( - "%lld | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d S/%d P/%d D | %d%% CPU | %.2f | %.2f", - m_perfmon.GetFrame(), GetInternalResolution().x, GetInternalResolution().y, fps, (int)(100.0 * fps / GetTvRefreshRate()), - s2.c_str(), - theApp.m_gs_interlace[m_interlace].name.c_str(), - theApp.m_gs_aspectratio[m_aspectratio].name.c_str(), - (int)m_perfmon.Get(GSPerfMon::SyncPoint), - (int)m_perfmon.Get(GSPerfMon::Prim), - (int)m_perfmon.Get(GSPerfMon::Draw), - m_perfmon.CPU(), - m_perfmon.Get(GSPerfMon::Swizzle) / 1024, - m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 - ); - - double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); - - if(fillrate > 0) - { - s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024)); - - int sum = 0; - - for(int i = 0; i < 16; i++) - { - sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); - } - - s += format(" | %d%% CPU", sum); - } - } - else - { - // Satisfy PCSX2's request for title info: minimal verbosity due to more external title text - - s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str()); - } - - if(m_capture.IsCapturing()) - { - s += " | Recording..."; - } - - if(m_wnd->IsManaged()) - { - m_wnd->SetWindowText(s.c_str()); - } - else - { - // note: do not use TryEnterCriticalSection. It is unnecessary code complication in - // an area that absolutely does not matter (even if it were 100 times slower, it wouldn't - // be noticeable). Besides, these locks are extremely short -- overhead of conditional - // is way more expensive than just waiting for the CriticalSection in 1 of 10,000,000 tries. --air - - std::lock_guard lock(m_pGSsetTitle_Crit); - - strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1); - - m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0; // make sure null terminated even if text overflows - } - } - else - { - // [TODO] - // We don't have window title rights, or the window has no title, - // so let's use actual OSD! - } - - if(m_frameskip) - { - return; - } - - // present - - m_dev->Present(m_wnd->GetClientRect().fit(m_aspectratio), m_shader); - - // snapshot - - if(!m_snapshot.empty()) - { - bool shift = false; - - #ifdef _WIN32 - - shift = !!(::GetAsyncKeyState(VK_SHIFT) & 0x8000); - - #else - - shift = m_shift_key; - - #endif - - if(!m_dump && shift) - { - GSFreezeData fd; - fd.size = 0; - fd.data = NULL; - Freeze(&fd, true); - fd.data = new uint8[fd.size]; - Freeze(&fd, false); - - m_dump.Open(m_snapshot, m_crc, fd, m_regs); - - delete [] fd.data; - } - - if(GSTexture* t = m_dev->GetCurrent()) - { - t->Save(m_snapshot + ".bmp", true); - } - - m_snapshot.clear(); - } - else - { - if(m_dump) - { - bool control = false; - - #ifdef _WIN32 - - control = !!(::GetAsyncKeyState(VK_CONTROL) & 0x8000); - - #else - - control = m_control_key; - - #endif - - m_dump.VSync(field, !control, m_regs); - } - } - - // capture - - if(m_capture.IsCapturing()) - { - if(GSTexture* current = m_dev->GetCurrent()) - { - GSVector2i size = m_capture.GetSize(); - - if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y)) - { - GSTexture::GSMap m; - - if(offscreen->Map(m)) - { - m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped()); - - offscreen->Unmap(); - } - - m_dev->Recycle(offscreen); - } - } - } -} - -bool GSRenderer::MakeSnapshot(const string& path) -{ - if(m_snapshot.empty()) - { - time_t t = time(NULL); - - char buff[16]; - - if(strftime(buff, sizeof(buff), "%Y%m%d%H%M%S", localtime(&t))) - { - m_snapshot = format("%s_%s", path.c_str(), buff); - } - } - - return true; -} - -bool GSRenderer::BeginCapture() -{ - GSVector4i disp = m_wnd->GetClientRect().fit(m_aspectratio); - float aspect = (float)disp.width() / max(1, disp.height()); - - return m_capture.BeginCapture(GetTvRefreshRate(), GetInternalResolution(), aspect); -} - -void GSRenderer::EndCapture() -{ - m_capture.EndCapture(); -} - -void GSRenderer::KeyEvent(GSKeyEventData* e) -{ -#ifdef _WIN32 - if(e->type == KEYPRESS) - { - - int step = (::GetAsyncKeyState(VK_SHIFT) & 0x8000) ? -1 : 1; - - switch(e->key) - { - case VK_F5: - m_interlace = (m_interlace + s_interlace_nb + step) % s_interlace_nb; - printf("GSdx: Set deinterlace mode to %d (%s).\n", (int)m_interlace, theApp.m_gs_interlace.at(m_interlace).name.c_str()); - return; - case VK_F6: - if( m_wnd->IsManaged() ) - m_aspectratio = (m_aspectratio + s_aspect_ratio_nb + step) % s_aspect_ratio_nb; - return; - case VK_F7: - m_shader = (m_shader + s_post_shader_nb + step) % s_post_shader_nb; - printf("GSdx: Set shader to: %d.\n", (int)m_shader); - return; - case VK_DELETE: - m_aa1 = !m_aa1; - printf("GSdx: (Software) Edge anti-aliasing is now %s.\n", m_aa1 ? "enabled" : "disabled"); - return; - case VK_INSERT: - m_mipmap = !m_mipmap; - printf("GSdx: (Software) Mipmapping is now %s.\n", m_mipmap ? "enabled" : "disabled"); - return; - case VK_PRIOR: - m_fxaa = !m_fxaa; - printf("GSdx: FXAA anti-aliasing is now %s.\n", m_fxaa ? "enabled" : "disabled"); - return; - case VK_HOME: - m_shaderfx = !m_shaderfx; - printf("GSdx: External post-processing is now %s.\n", m_shaderfx ? "enabled" : "disabled"); - return; - } - - } -#elif defined(__linux__) - if(e->type == KEYPRESS) - { - int step = m_shift_key ? -1 : 1; - - switch(e->key) - { - case XK_F5: - m_interlace = (m_interlace + s_interlace_nb + step) % s_interlace_nb; - printf("GSdx: Set deinterlace mode to %d (%s).\n", (int)m_interlace, theApp.m_gs_interlace.at(m_interlace).name.c_str()); - return; - case XK_F6: - if( m_wnd->IsManaged() ) - m_aspectratio = (m_aspectratio + s_aspect_ratio_nb + step) % s_aspect_ratio_nb; - return; - case XK_F7: - m_shader = (m_shader + s_post_shader_nb + step) % s_post_shader_nb; - printf("GSdx: Set shader %d.\n", (int)m_shader); - return; - case XK_Delete: - m_aa1 = !m_aa1; - printf("GSdx: (Software) Edge anti-aliasing is now %s.\n", m_aa1 ? "enabled" : "disabled"); - return; - case XK_Insert: - m_mipmap = !m_mipmap; - printf("GSdx: (Software) Mipmapping is now %s.\n", m_mipmap ? "enabled" : "disabled"); - return; - case XK_Prior: - m_fxaa = !m_fxaa; - printf("GSdx: FXAA anti-aliasing is now %s.\n", m_fxaa ? "enabled" : "disabled"); - return; - case XK_Home: - m_shaderfx = !m_shaderfx; - printf("GSdx: External post-processing is now %s.\n", m_shaderfx ? "enabled" : "disabled"); - return; - case XK_Shift_L: - case XK_Shift_R: - m_shift_key = true; - return; - case XK_Control_L: - case XK_Control_R: - m_control_key = true; - return; - } - - } - else if(e->type == KEYRELEASE) - { - switch(e->key) - { - case XK_Shift_L: - case XK_Shift_R: - m_shift_key = false; - return; - case XK_Control_L: - case XK_Control_R: - m_control_key = false; - return; - } - } -#endif -} diff --git a/plugins/GSdx_legacy/GSRenderer.h b/plugins/GSdx_legacy/GSRenderer.h deleted file mode 100644 index cca70a3486..0000000000 --- a/plugins/GSdx_legacy/GSRenderer.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSdx.h" -#include "GSWnd.h" -#include "GSState.h" -#include "GSCapture.h" - -class GSRenderer : public GSState -{ - GSCapture m_capture; - string m_snapshot; - int m_shader; - - bool Merge(int field); - - // Only used on linux - bool m_shift_key; - bool m_control_key; - -protected: - int m_interlace; - int m_aspectratio; - int m_filter; - bool m_vsync; - bool m_aa1; - bool m_framelimit; - bool m_shaderfx; - bool m_fxaa; - bool m_shadeboost; - bool m_texture_shuffle; - - virtual GSTexture* GetOutput(int i) = 0; - -public: - GSWnd* m_wnd; - GSDevice* m_dev; - -public: - GSRenderer(); - virtual ~GSRenderer(); - - virtual bool CreateWnd(const string& title, int w, int h); - virtual bool CreateDevice(GSDevice* dev); - virtual void ResetDevice(); - virtual void VSync(int field); - virtual bool MakeSnapshot(const string& path); - virtual void KeyEvent(GSKeyEventData* e); - virtual bool CanUpscale() {return false;} - virtual int GetUpscaleMultiplier() {return 1;} - virtual GSVector2i GetInternalResolution() { - return GSVector2i(GetDisplayRect().width(), GetDisplayRect().height()); - } - void SetAspectRatio(int aspect) {m_aspectratio = aspect;} - void SetVSync(bool enabled); - void SetFrameLimit(bool limit); - virtual void SetExclusive(bool isExcl) {} - - virtual bool BeginCapture(); - virtual void EndCapture(); - -public: - std::mutex m_pGSsetTitle_Crit; - - char m_GStitleInfoBuffer[128]; -}; diff --git a/plugins/GSdx_legacy/GSRendererCL.cpp b/plugins/GSdx_legacy/GSRendererCL.cpp deleted file mode 100644 index 1f90f71046..0000000000 --- a/plugins/GSdx_legacy/GSRendererCL.cpp +++ /dev/null @@ -1,2248 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererCL.h" - -#ifdef ENABLE_OPENCL - -#define LOG 0 - -static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL; - -#define MAX_FRAME_SIZE 2048 -#define MAX_PRIM_COUNT 4096u -#define MAX_PRIM_PER_BATCH_BITS 5 -#define MAX_PRIM_PER_BATCH (1u << MAX_PRIM_PER_BATCH_BITS) -#define BATCH_COUNT(prim_count) (((prim_count) + (MAX_PRIM_PER_BATCH - 1)) / MAX_PRIM_PER_BATCH) -#define MAX_BATCH_COUNT BATCH_COUNT(MAX_PRIM_COUNT) -#define BIN_SIZE_BITS 4 -#define BIN_SIZE (1u << BIN_SIZE_BITS) -#define MAX_BIN_PER_BATCH ((MAX_FRAME_SIZE / BIN_SIZE) * (MAX_FRAME_SIZE / BIN_SIZE)) -#define MAX_BIN_COUNT (MAX_BIN_PER_BATCH * MAX_BATCH_COUNT) -#define TFX_PARAM_SIZE 2048 -#define TFX_MAX_PARAM_COUNT 256 - -#if MAX_PRIM_PER_BATCH == 64u -#define BIN_TYPE cl_ulong -#elif MAX_PRIM_PER_BATCH == 32u -#define BIN_TYPE cl_uint -#else -#error "MAX_PRIM_PER_BATCH != 32u OR 64u" -#endif - -#pragma pack(push, 1) - -typedef struct -{ - GSVertexCL v[4]; -} gs_prim; - -typedef struct -{ - cl_float4 dx, dy; - cl_float4 zero; - cl_float4 reject_corner; -} gs_barycentric; - -typedef struct -{ - struct { cl_uint first, last; } bounds[MAX_BIN_PER_BATCH]; - BIN_TYPE bin[MAX_BIN_COUNT]; - cl_uchar4 bbox[MAX_PRIM_COUNT]; - gs_prim prim[MAX_PRIM_COUNT]; - gs_barycentric barycentric[MAX_PRIM_COUNT]; -} gs_env; - -#pragma pack(pop) - -GSRendererCL::GSRendererCL() - : m_vb_count(0) - , m_synced(true) -{ - m_nativeres = true; // ignore ini, sw is always native - - memset(m_texture, 0, sizeof(m_texture)); - - m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); - - for(int i = 0; i < 4; i++) - { - m_rw_pages[0][i] = GSVector4i::zero(); - m_rw_pages[1][i] = GSVector4i::zero(); - m_tc_pages[i] = GSVector4i::xffffffff(); - } - - memset(m_rw_pages_rendering, 0, sizeof(m_rw_pages_rendering)); - - #define InitCVB(P) \ - m_cvb[P][0][0] = &GSRendererCL::ConvertVertexBuffer; \ - m_cvb[P][0][1] = &GSRendererCL::ConvertVertexBuffer; \ - m_cvb[P][1][0] = &GSRendererCL::ConvertVertexBuffer; \ - m_cvb[P][1][1] = &GSRendererCL::ConvertVertexBuffer; \ - - InitCVB(GS_POINT_CLASS); - InitCVB(GS_LINE_CLASS); - InitCVB(GS_TRIANGLE_CLASS); - InitCVB(GS_SPRITE_CLASS); - - // NOTE: m_cl.vm may be cached on the device according to the specs, there are a couple of places where we access m_mem.m_vm8 without - // mapping the buffer (after the two invalidate* calls and in getoutput), it is currently not an issue, but on some devices it may be. - - m_cl.vm = cl::Buffer(m_cl.context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, (size_t)m_mem.m_vmsize, m_mem.m_vm8, NULL); - m_cl.tex = cl::Buffer(m_cl.context, CL_MEM_READ_ONLY, (size_t)m_mem.m_vmsize); -} - -GSRendererCL::~GSRendererCL() -{ - for(size_t i = 0; i < countof(m_texture); i++) - { - delete m_texture[i]; - } - - _aligned_free(m_output); -} - -void GSRendererCL::Reset() -{ - Sync(-1); - - GSRenderer::Reset(); -} - -static int pageuploads = 0; -static int pageuploadcount = 0; -static int tfxcount = 0; -static int64 tfxpixels = 0; -static int tfxselcount = 0; -static int tfxdiffselcount = 0; - -void GSRendererCL::VSync(int field) -{ - GSRenderer::VSync(field); - - //printf("vsync %d/%d/%d/%d\n", pageuploads, pageuploadcount, tfxcount, tfxpixels); - //printf("vsync %d/%d\n", tfxselcount, tfxdiffselcount); - pageuploads = pageuploadcount = tfxcount = tfxpixels = 0; - tfxselcount = tfxdiffselcount = 0; - - //if(!field) memset(m_mem.m_vm8, 0, (size_t)m_mem.m_vmsize); -} - -void GSRendererCL::ResetDevice() -{ - for(size_t i = 0; i < countof(m_texture); i++) - { - delete m_texture[i]; - - m_texture[i] = NULL; - } -} - -GSTexture* GSRendererCL::GetOutput(int i) -{ - const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB; - - int w = DISPFB.FBW * 64; - int h = GetFrameRect(i).bottom; - - // TODO: round up bottom - - if(m_dev->ResizeTexture(&m_texture[i], w, h)) - { - static int pitch = 1024 * 4; - - GSVector4i r(0, 0, w, h); - - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM]; - - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.SBP = DISPFB.Block(); - BITBLTBUF.SBW = DISPFB.FBW; - BITBLTBUF.SPSM = DISPFB.PSM; - - InvalidateLocalMem(BITBLTBUF, r); - - (m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), r.ralign(psm.bs), m_output, pitch, m_env.TEXA); - - m_texture[i]->Update(r, m_output, pitch); - - if(s_dump) - { - if(s_save && s_n >= s_saven) - { - m_texture[i]->Save(format("c:\\temp1\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), (int)DISPFB.PSM)); - } - - s_n++; - } - } - - return m_texture[i]; -} - -const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 1.0f); - -template -void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) -{ - GSVector4i o = (GSVector4i)m_context->XYOFFSET; - GSVector4 st_scale = GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH, 1, 0); - - for(int i = (int)m_vertex.next; i > 0; i--, src++, dst++) - { - GSVector4 stcq = GSVector4::load(&src->m[0]); // s t rgba q - - GSVector4i xyzuvf(src->m[1]); - - dst->p = (GSVector4(xyzuvf.upl16() - o) * g_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints - - GSVector4 t = GSVector4::zero(); - - if(tme) - { - if(fst) - { - #if _M_SSE >= 0x401 - - t = GSVector4(xyzuvf.uph16()); - - #else - - t = GSVector4(GSVector4i::load(src->UV).upl16()); - - #endif - } - else - { - t = stcq.xyww() * st_scale; - } - } - - dst->t = t.insert32<2, 3>(stcq); // color as uchar4 in t.w - } -} - -void GSRendererCL::Draw() -{ - const GSDrawingContext* context = m_context; - - GSVector4i scissor = GSVector4i(context->scissor.in); - GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil())); - - // points and lines may have zero area bbox (example: single line 0,0->256,0) - - if(m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) - { - if(bbox.x == bbox.z) bbox.z++; - if(bbox.y == bbox.w) bbox.w++; - } - - scissor.z = std::min(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour - - GSVector4i rect = bbox.rintersect(scissor); - - if(rect.rempty()) - { - return; - } - - if(s_dump) - { - Sync(2); - - uint64 frame = m_perfmon.GetFrame(); - - std::string s; - - if(s_save && s_n >= s_saven && PRIM->TME) - { - s = format("c:\\temp1\\_%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); - - m_mem.SaveBMP(s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); - } - - s_n++; - - if(s_save && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); - - m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); - } - - if(s_savez && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); - - m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); - } - - s_n++; - } - - try - { - size_t vb_size = m_vertex.next * sizeof(GSVertexCL); - size_t ib_size = m_index.tail * sizeof(uint32); - size_t pb_size = TFX_PARAM_SIZE; - - ASSERT(sizeof(TFXParameter) <= TFX_PARAM_SIZE); - - if(m_cl.vb.tail + vb_size > m_cl.vb.size || m_cl.ib.tail + ib_size > m_cl.ib.size || m_cl.pb.tail + pb_size > m_cl.pb.size) - { - if(vb_size > m_cl.vb.size || ib_size > m_cl.ib.size) - { - // buffer too small for even one batch, allow twice the size (at least 1 MB) - - Sync(2); // must sync, reallocating the input buffers - - m_cl.Unmap(); - - m_cl.vb.size = 0; - m_cl.ib.size = 0; - - size_t size = std::max(vb_size * 2, (size_t)2 << 20); - - printf("growing vertex/index buffer %d\n", size); - - m_cl.vb.buff[0] = cl::Buffer(m_cl.context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size); - m_cl.vb.buff[1] = cl::Buffer(m_cl.context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size); - m_cl.vb.size = size; - - size = std::max(size / sizeof(GSVertex) * 3 * sizeof(uint32), (size_t)1 << 20); // worst case, three times the vertex count - - ASSERT(size >= ib_size); - - if(size < ib_size) size = ib_size; // should not happen - - m_cl.ib.buff[0] = cl::Buffer(m_cl.context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size); - m_cl.ib.buff[1] = cl::Buffer(m_cl.context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, size); - m_cl.ib.size = size; - } - else - { - Enqueue(); - - m_cl.Unmap(); - - // make the write queue wait until the rendering queue is ready, it may still use the device buffers - - std::vector el(1); - - m_cl.queue[2].enqueueMarker(&el[0]); - m_cl.wq->enqueueWaitForEvents(el); - - // switch to the other queue/buffer (double buffering) - - m_cl.wqidx = (m_cl.wqidx + 1) & 1; - m_cl.wq = &m_cl.queue[m_cl.wqidx]; - } - - m_cl.vb.head = m_cl.vb.tail = 0; - m_cl.ib.head = m_cl.ib.tail = 0; - m_cl.pb.head = m_cl.pb.tail = 0; - - m_cl.Map(); - } - else - { - // only allow batches of the same primclass in Enqueue - - if(!m_jobs.empty() && m_jobs.front()->sel.prim != (uint32)m_vt.m_primclass) - { - Enqueue(); - } - } - - // - - GSVertexCL* vb = (GSVertexCL*)(m_cl.vb.ptr + m_cl.vb.tail); - uint32* ib = (uint32*)(m_cl.ib.ptr + m_cl.ib.tail); - TFXParameter* pb = (TFXParameter*)(m_cl.pb.ptr + m_cl.pb.tail); - - (this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST])(vb, m_vertex.buff, m_vertex.next); // TODO: upload in GSVertex format and extract the fields in the kernel? - - if(m_jobs.empty()) - { - memcpy(ib, m_index.buff, m_index.tail * sizeof(uint32)); - - m_vb_start = m_cl.vb.tail; - m_vb_count = 0; - m_pb_start = m_cl.pb.tail; - m_pb_count = 0; - } - else - { - // TODO: SIMD - - ASSERT(m_pb_count < TFX_MAX_PARAM_COUNT); - - uint32 vb_count = m_vb_count | (m_pb_count << 24); - - for(size_t i = 0; i < m_index.tail; i++) - { - ib[i] = m_index.buff[i] + vb_count; - } - } - - shared_ptr job(new TFXJob()); - - if(!SetupParameter(job.get(), pb, vb, m_vertex.next, m_index.buff, m_index.tail)) - { - return; - } - - pb->scissor = scissor; - - if(bbox.eq(bbox.rintersect(scissor))) - { - pb->sel.noscissor = 1; - } - - job->rect.x = rect.x; - job->rect.y = rect.y; - job->rect.z = rect.z; - job->rect.w = rect.w; - job->sel = pb->sel; - job->ib_start = m_cl.ib.tail; - job->prim_count = m_index.tail / GSUtil::GetClassVertexCount(m_vt.m_primclass); - job->fbp = pb->fbp; - job->zbp = pb->zbp; - job->bw = pb->bw; - job->fpsm = context->FRAME.PSM; - job->zpsm = context->ZBUF.PSM; - job->tpsm = context->TEX0.PSM; - -#ifdef DEBUG - job->pb = pb; -#endif - m_jobs.push_back(job); - - m_vb_count += m_vertex.next; - m_pb_count++; - - m_cl.vb.tail += vb_size; - m_cl.ib.tail += ib_size; - m_cl.pb.tail += pb_size; - - m_synced = false; - - // mark pages used in rendering as source or target - - if(job->sel.fwrite || job->sel.rfb) - { - m_context->offset.fb->GetPagesAsBits(rect, m_tmp_pages); - - if(job->sel.rfb) - { - for(int i = 0; i < 4; i++) - { - m_rw_pages[0][i] |= m_tmp_pages[i]; - } - } - - if(job->sel.fwrite) - { - GSVector4i* dst_pages = job->GetDstPages(); - - for(int i = 0; i < 4; i++) - { - m_rw_pages[1][i] |= m_tmp_pages[i]; - - dst_pages[i] |= m_tmp_pages[i]; - } - } - } - - if(job->sel.zwrite || job->sel.rzb) - { - m_context->offset.zb->GetPagesAsBits(rect, m_tmp_pages); - - if(job->sel.rzb) - { - for(int i = 0; i < 4; i++) - { - m_rw_pages[0][i] |= m_tmp_pages[i]; - } - } - - if(job->sel.zwrite) - { - GSVector4i* dst_pages = job->GetDstPages(); - - for(int i = 0; i < 4; i++) - { - m_rw_pages[1][i] |= m_tmp_pages[i]; - - dst_pages[i] |= m_tmp_pages[i]; - } - } - } - - if(job->src_pages != NULL) - { - for(int i = 0; i < 4; i++) - { - m_rw_pages[0][i] |= job->src_pages[i]; - - if(job->dst_pages != NULL && !(job->dst_pages[i] & job->src_pages[i]).eq(GSVector4i::zero())) - { - //printf("src and dst overlap!\n"); - } - } - } - - // don't buffer too much data, feed them to the device if there is enough - - if(m_pb_count >= TFX_MAX_PARAM_COUNT || m_vb_count >= 4096) - { - Enqueue(); - } - } - catch(cl::Error err) - { - printf("%s (%d)\n", err.what(), err.err()); - - return; - } - catch(std::exception err) - { - printf("%s\n", err.what()); - - return; - } - - if(s_dump) - { - Sync(2); - - uint64 frame = m_perfmon.GetFrame(); - - std::string s; - - if(s_save && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); - - m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); - } - - if(s_savez && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); - - m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); - } - - s_n++; - } -} - -void GSRendererCL::Sync(int reason) -{ - if(LOG) { fprintf(s_fp, "Sync (%d)\n", reason); fflush(s_fp); } - - //printf("sync %d\n", reason); - - GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync); - - Enqueue(); - - m_cl.queue[2].finish(); - - for(int i = 0; i < 4; i++) - { - m_rw_pages[0][i] = GSVector4i::zero(); - m_rw_pages[1][i] = GSVector4i::zero(); - } - - for(int i = 0; i < MAX_PAGES; i++) ASSERT(m_rw_pages_rendering[i] == 0); - - m_synced = true; -} - -void GSRendererCL::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) -{ - if(LOG) {fprintf(s_fp, "w %05x %d %d, %d %d %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM, r.x, r.y, r.z, r.w); fflush(s_fp);} - - GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); - - o->GetPagesAsBits(r, m_tmp_pages); - - if(!m_synced) - { - int i = 0; - - bool wait; - - do - { - wait = false; - - for(; i < 4; i++) - { - GSVector4i pages = m_rw_pages[0][i] | m_rw_pages[1][i]; - - if(!(pages & m_tmp_pages[i]).eq(GSVector4i::zero())) - { - // TODO: an awesome idea to avoid this Sync - // - call Enqueue() to flush m_jobs - // - append rendering queue with a kernel that writes the incoming data to m_mem.vm and tell the parent class to not do it - // - the only problem, clut has to be read directly by the texture sampler, can't attach it to gs_param before being written - - //Sync(3); - - Enqueue(); - - wait = true; - - break; - } - } - - _mm_pause(); - } - while(wait); - - if(!m_synced) - { - o->GetPages(r, m_tmp_pages2); // TODO: don't ask twice - - const uint32* p = m_tmp_pages2; - - do - { - wait = false; - - for(; *p != GSOffset::EOP; p++) - { - if(m_rw_pages_rendering[*p]) - { - // Sync(5); - - wait = true; - - break; - } - } - /* - if(!m_synced) - { - void* ptr = m_cl.wq->enqueueMapBuffer(m_cl.vm, CL_TRUE, CL_MAP_READ, 0, m_mem.m_vmsize); - m_cl.wq->enqueueUnmapMemObject(m_cl.vm, ptr); - } - */ - - _mm_pause(); - } - while(wait); - } - } - - for(int i = 0; i < 4; i++) - { - m_tc_pages[i] |= m_tmp_pages[i]; - } -} - -void GSRendererCL::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) -{ - if(LOG) {fprintf(s_fp, "%s %05x %d %d, %d %d %d %d\n", clut ? "rp" : "r", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w); fflush(s_fp);} - - if(!m_synced) - { - GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); - - o->GetPagesAsBits(r, m_tmp_pages); - - for(int i = 0; i < 4; i++) - { - GSVector4i pages = m_rw_pages[1][i]; - - if(!(pages & m_tmp_pages[i]).eq(GSVector4i::zero())) - { - Sync(4); - - break; - } - } - - if(!m_synced) - { - o->GetPages(r, m_tmp_pages2); // TODO: don't ask twice - - for(const uint32* p = m_tmp_pages2; *p != GSOffset::EOP; p++) - { - if(m_rw_pages_rendering[*p] & 0xffff0000) - { - Sync(6); - - break; - } - } - /* - if(!m_synced) - { - void* ptr = m_cl.wq->enqueueMapBuffer(m_cl.vm, CL_TRUE, CL_MAP_READ, 0, m_mem.m_vmsize); - m_cl.wq->enqueueUnmapMemObject(m_cl.vm, ptr); - } - */ - } - } -} - -typedef struct { GSRendererCL* r; uint32 pages[(MAX_PAGES + 1) * 2]; } cb_data; - -void GSRendererCL::Enqueue() -{ - if(m_jobs.empty()) return; - - cb_data* data = new cb_data(); - - data->r = this; - - UsePages(data->pages); - - try - { - ASSERT(m_cl.vb.tail > m_cl.vb.head); - ASSERT(m_cl.ib.tail > m_cl.ib.head); - ASSERT(m_cl.pb.tail > m_cl.pb.head); - - int primclass = m_jobs.front()->sel.prim; - - uint32 n = GSUtil::GetClassVertexCount(primclass); - - PrimSelector psel; - - psel.key = 0; - psel.prim = primclass; - - cl::Kernel& pk = m_cl.GetPrimKernel(psel); - - pk.setArg(1, m_cl.vb.buff[m_cl.wqidx]); - pk.setArg(2, m_cl.ib.buff[m_cl.wqidx]); - pk.setArg(3, m_cl.pb.buff[m_cl.wqidx]); - pk.setArg(4, (cl_uint)m_vb_start); - pk.setArg(6, (cl_uint)m_pb_start); - - TileSelector tsel; - - tsel.key = 0; - tsel.prim = primclass; - - tsel.mode = 0; - - cl::Kernel& tk_32 = m_cl.GetTileKernel(tsel); - - tsel.mode = 1; - - cl::Kernel& tk_16 = m_cl.GetTileKernel(tsel); - - tsel.mode = 2; - - cl::Kernel& tk_8 = m_cl.GetTileKernel(tsel); - - tsel.mode = 3; - - cl::Kernel& tk = m_cl.GetTileKernel(tsel); - - tsel.key = 0; - tsel.clear = 1; - - cl::Kernel& tk_clear = m_cl.GetTileKernel(tsel); - - // - - m_cl.Unmap(); - - std::vector el(1); - - m_cl.wq->enqueueMarker(&el[0]); - m_cl.queue[2].enqueueWaitForEvents(el); - - // - - auto head = m_jobs.begin(); - - while(head != m_jobs.end()) - { - uint32 total_prim_count = 0; - - auto next = head; - - while(next != m_jobs.end()) - { - auto job = next++; - - uint32 cur_prim_count = (*job)->prim_count; - uint32 next_prim_count = next != m_jobs.end() ? (*next)->prim_count : 0; - - total_prim_count += cur_prim_count; - - if(total_prim_count >= MAX_PRIM_COUNT || next == m_jobs.end())// || next_prim_count >= MAX_PRIM_COUNT || next_prim_count < 16 && total_prim_count >= MAX_PRIM_COUNT / 2) - { - uint32 prim_count = std::min(total_prim_count, MAX_PRIM_COUNT); - - pk.setArg(5, (cl_uint)(*head)->ib_start); - - m_cl.queue[2].enqueueNDRangeKernel(pk, cl::NullRange, cl::NDRange(prim_count), cl::NullRange); - - if(0) - { - gs_env* ptr = (gs_env*)m_cl.queue[2].enqueueMapBuffer(m_cl.env, CL_TRUE, CL_MAP_READ, 0, sizeof(gs_env)); - m_cl.queue[2].enqueueUnmapMemObject(m_cl.env, ptr); - } - - GSVector4i rect = GSVector4i::zero(); - - for(auto i = head; i != next; i++) - { - rect = rect.runion(GSVector4i::load(&(*i)->rect)); - } - - rect = rect.ralign(GSVector2i(BIN_SIZE, BIN_SIZE)) >> BIN_SIZE_BITS; - - int bin_w = rect.width(); - int bin_h = rect.height(); - - uint32 batch_count = BATCH_COUNT(prim_count); - uint32 bin_count = bin_w * bin_h; - - cl_uchar4 bin_dim; - - bin_dim.s[0] = (cl_uchar)rect.x; - bin_dim.s[1] = (cl_uchar)rect.y; - bin_dim.s[2] = (cl_uchar)bin_w; - bin_dim.s[3] = (cl_uchar)bin_h; - - if(1)//bin_w > 1 || bin_h > 1) // && not just one sprite covering the whole area - { - m_cl.queue[2].enqueueNDRangeKernel(tk_clear, cl::NullRange, cl::NDRange(bin_count), cl::NullRange); - - if(bin_count <= 32 && m_cl.WIs >= 256) - { - uint32 item_count; - uint32 group_count; - cl::Kernel* k; - - if(bin_count <= 8) - { - item_count = std::min(prim_count, 32u); - group_count = ((prim_count + 31) >> 5) * item_count; - k = &tk_32; - } - else if(bin_count <= 16) - { - item_count = std::min(prim_count, 16u); - group_count = ((prim_count + 15) >> 4) * item_count; - k = &tk_16; - } - else - { - item_count = std::min(prim_count, 8u); - group_count = ((prim_count + 7) >> 3) * item_count; - k = &tk_8; - } - - k->setArg(1, (cl_uint)prim_count); - k->setArg(2, (cl_uint)bin_count); - k->setArg(3, bin_dim); - - m_cl.queue[2].enqueueNDRangeKernel(*k, cl::NullRange, cl::NDRange(bin_w, bin_h, group_count), cl::NDRange(bin_w, bin_h, item_count)); - } - else - { - uint32 item_count = std::min(bin_count, m_cl.WIs); - uint32 group_count = batch_count * item_count; - - tk.setArg(1, (cl_uint)prim_count); - tk.setArg(2, (cl_uint)bin_count); - tk.setArg(3, bin_dim); - - m_cl.queue[2].enqueueNDRangeKernel(tk, cl::NullRange, cl::NDRange(group_count), cl::NDRange(item_count)); - } - - if(0) - { - gs_env* ptr = (gs_env*)m_cl.queue[2].enqueueMapBuffer(m_cl.env, CL_TRUE, CL_MAP_READ, 0, sizeof(gs_env)); - m_cl.queue[2].enqueueUnmapMemObject(m_cl.env, ptr); - } - } - - std::list> jobs(head, next); - - JoinTFX(jobs); - - EnqueueTFX(jobs, bin_count, bin_dim); - - if(total_prim_count > MAX_PRIM_COUNT) - { - prim_count = cur_prim_count - (total_prim_count - MAX_PRIM_COUNT); - - (*job)->ib_start += prim_count * n * sizeof(uint32); - (*job)->prim_count -= prim_count; - - next = job; // try again for the remainder - - //printf("split %d\n", (*job)->prim_count); - } - - break; - } - } - - head = next; - } - } - catch(cl::Error err) - { - printf("%s (%d)\n", err.what(), err.err()); - } - - try - { - cl::Event e; - m_cl.queue[2].enqueueMarker(&e); - e.setCallback(CL_COMPLETE, ReleasePageEvent, data); - } - catch(cl::Error err) - { - printf("%s (%d)\n", err.what(), err.err()); - - delete data; - } - - m_jobs.clear(); - - m_vb_count = 0; - - m_cl.vb.head = m_cl.vb.tail; - m_cl.ib.head = m_cl.ib.tail; - m_cl.pb.head = m_cl.pb.tail; - - m_cl.Map(); -} - -void GSRendererCL::EnqueueTFX(std::list>& jobs, uint32 bin_count, const cl_uchar4& bin_dim) -{ - cl_kernel tfx_prev = NULL; - - uint32 prim_start = 0; - - for(auto i : jobs) - { - ASSERT(prim_start < MAX_PRIM_COUNT); - - tfxcount++; - - uint32 prim_count = std::min(i->prim_count, MAX_PRIM_COUNT - prim_start); - - cl::Kernel& tfx = m_cl.GetTFXKernel(i->sel); - - cl::Buffer* tex = UpdateTextureCache(i.get()) ? &m_cl.tex : &m_cl.vm; - - tfx.setArg(2, sizeof(*tex), tex); - - if(tfx_prev != tfx()) - { - tfx.setArg(3, sizeof(m_cl.pb.buff[m_cl.wqidx]), &m_cl.pb.buff[m_cl.wqidx]); - tfx.setArg(4, (cl_uint)m_pb_start); - - tfx_prev = tfx(); - } - - tfx.setArg(5, (cl_uint)prim_start); - tfx.setArg(6, (cl_uint)prim_count); - tfx.setArg(7, (cl_uint)bin_count); - tfx.setArg(8, bin_dim); - tfx.setArg(9, i->fbp); - tfx.setArg(10, i->zbp); - tfx.setArg(11, i->bw); - - GSVector4i r = GSVector4i::load(&i->rect); - - r = r.ralign(GSVector2i(8, 8)); - - m_cl.queue[2].enqueueNDRangeKernel(tfx, cl::NDRange(r.left, r.top), cl::NDRange(r.width(), r.height()), cl::NDRange(8, 8)); - - tfxpixels += r.width() * r.height(); - - InvalidateTextureCache(i.get()); - - prim_start += prim_count; - } -} - -void GSRendererCL::JoinTFX(std::list>& jobs) -{ - // join tfx kernel calls where the selector and fbp/zbp/bw/fpsm/zpsm are the same and src_pages != prev dst_pages - - //printf("before\n"); for(auto i : jobs) printf("%016llx %05x %05x %d %d %d\n", i->sel.key, i->fbp, i->zbp, i->bw, i->prim_count, i->ib_start); - - tfxselcount += jobs.size(); - - auto next = jobs.begin(); - - while(next != jobs.end()) - { - auto prev = next++; - - if(next == jobs.end()) - { - break; - } - - TFXSelector prev_sel = (*prev)->sel; - TFXSelector next_sel = (*next)->sel; - - prev_sel.ababcd = next_sel.ababcd = 0; - prev_sel.wms = next_sel.wms = 0; - prev_sel.wmt = next_sel.wmt = 0; - prev_sel.noscissor = next_sel.noscissor = prev_sel.noscissor | next_sel.noscissor; - prev_sel.merged = next_sel.merged = 0; - - if(prev_sel != next_sel - || (*prev)->fbp != (*next)->fbp - || (*prev)->zbp != (*next)->zbp - || (*prev)->bw != (*next)->bw - || (*prev)->fpsm != (*next)->fpsm - || (*prev)->zpsm != (*next)->zpsm) - { - continue; - } - - if((*prev)->dst_pages != NULL && (*next)->src_pages != NULL) - { - bool overlap = false; - - for(int i = 0; i < 4; i++) - { - if(!((*prev)->dst_pages[i] & (*next)->src_pages[i]).eq(GSVector4i::zero())) - { - overlap = true; - - break; - } - } - - if(overlap) - { - continue; - } - } - - if((*prev)->src_pages != NULL) - { - GSVector4i* src_pages = (*next)->GetSrcPages(); - - for(int i = 0; i < 4; i++) - { - src_pages[i] |= (*prev)->src_pages[i]; - } - } - - if((*prev)->dst_pages != NULL) - { - GSVector4i* dst_pages = (*next)->GetDstPages(); - - for(int i = 0; i < 4; i++) - { - dst_pages[i] |= (*prev)->dst_pages[i]; - } - } - - GSVector4i prev_rect = GSVector4i::load(&(*prev)->rect); - GSVector4i next_rect = GSVector4i::load(&(*next)->rect); - - GSVector4i::store(&(*next)->rect, prev_rect.runion(next_rect)); - - (*next)->prim_count += (*prev)->prim_count; - (*next)->ib_start = (*prev)->ib_start; - - (*next)->sel = next_sel; - (*next)->sel.merged = 1; - - jobs.erase(prev); - - //if((*prev)->sel != (*next)->sel) printf("%d %016llx %016llx\n", jobs.size(), (*prev)->sel.key, (*next)->sel.key); - } - - tfxdiffselcount += jobs.size(); - - //printf("after\n"); for(auto i : jobs) printf("%016llx %05x %05x %d %d %d\n", i->sel.key, i->fbp, i->zbp, i->bw, i->prim_count, i->ib_start); -} - -bool GSRendererCL::UpdateTextureCache(TFXJob* job) -{ - if(job->src_pages == NULL) return false; - - bool overlap = false; - bool invalid = false; - - if(job->dst_pages != NULL) - { - bool can_overlap = job->sel.fwrite && GSUtil::HasSharedBits(job->tpsm, job->fpsm) || job->sel.zwrite && GSUtil::HasSharedBits(job->tpsm, job->zpsm); - - for(int i = 0; i < 4; i++) - { - if(!(job->src_pages[i] & job->dst_pages[i]).eq(GSVector4i::zero())) - { - overlap = can_overlap; // gow, re4 - } - - if(!(m_tc_pages[i] & job->src_pages[i]).eq(GSVector4i::zero())) - { - invalid = true; - } - } - } - - if(!invalid) - { - return true; // all needed pages are valid in texture cache, use it - } - - if(!overlap) - { - return false; // no overlap, but has invalid pages, don't use texture cache - } - - // overlap && invalid, update and use texture cache - - int count = 0; - - for(int i = 0; i < 4; i++) - { - GSVector4i pages = m_tc_pages[i] & job->src_pages[i]; - - if(pages.eq(GSVector4i::zero())) continue; - - m_tc_pages[i] &= ~job->src_pages[i]; - - for(int j = 0; j < 4; j++) - { - if(pages.u32[j] == 0) continue; - - if(pages.u32[j] == 0xffffffff) - { - size_t offset = (i * sizeof(GSVector4i) + j * sizeof(uint32)) * 8 * PAGE_SIZE; - - m_cl.queue[2].enqueueCopyBuffer(m_cl.vm, m_cl.tex, offset, offset, PAGE_SIZE * 32); - - if(LOG) { fprintf(s_fp, "tc (%d x32)\n", offset >> 13); fflush(s_fp); } - - pageuploadcount++; - count += 32; - - continue; - } - - for(int k = 0; k < 4; k++) - { - uint8 b = pages.u8[j * 4 + k]; - - if(b == 0) continue; - - if(b == 0xff) - { - size_t offset = (i * sizeof(GSVector4i) + (j * 4 + k)) * 8 * PAGE_SIZE; - - m_cl.queue[2].enqueueCopyBuffer(m_cl.vm, m_cl.tex, offset, offset, PAGE_SIZE * 8); - - if(LOG) { fprintf(s_fp, "tc (%d x8)\n", offset >> 13); fflush(s_fp); } - - pageuploadcount++; - count += 8; - - continue; - } - - for(int l = 0; l < 8; l++) - { - if(b & (1 << l)) - { - size_t offset = ((i * sizeof(GSVector4i) + (j * 4 + k)) * 8 + l) * PAGE_SIZE; - - m_cl.queue[2].enqueueCopyBuffer(m_cl.vm, m_cl.tex, offset, offset, PAGE_SIZE); - - if(LOG) { fprintf(s_fp, "tc (%d x1)\n", offset >> 13); fflush(s_fp); } - - pageuploadcount++; - count++; - } - } - } - } - } - - if(count > 0) - { - pageuploads += count; - } - - return true; -} - -void GSRendererCL::InvalidateTextureCache(TFXJob* job) -{ - if(job->dst_pages == NULL) return; - - for(int i = 0; i < 4; i++) - { - m_tc_pages[i] |= job->dst_pages[i]; - } -} - -void GSRendererCL::UsePages(uint32* p) -{ - for(int l = 0; l < 2; l++) - { - for(int i = 0; i < 4; i++) - { - GSVector4i* v = &m_rw_pages[l][i]; - - if(v->eq(GSVector4i::zero())) continue; - - for(int j = 0; j < 4; j++) - { - unsigned long index; - unsigned long mask = v->u32[j]; - - if(mask == 0) continue; - - int o = (i << 7) | (j << 5); - - if(mask == 0xffffffff) - { - for(int index = 0; index < 32; index++) - { - _InterlockedIncrement16((short*)&m_rw_pages_rendering[index | o] + l); - - *p++ = index | o; - } - } - else - { - while(_BitScanForward(&index, mask)) - { - mask &= ~(1 << index); - - _InterlockedIncrement16((short*)&m_rw_pages_rendering[index | o] + l); - - *p++ = index | o; - } - } - } - - *v = GSVector4i::zero(); - } - - *p++ = GSOffset::EOP; - } -} - -void GSRendererCL::ReleasePages(uint32* pages) -{ - const uint32* p = pages; - - for(; *p != GSOffset::EOP; p++) - { - _InterlockedDecrement16((short*)&m_rw_pages_rendering[*p] + 0); - } - - p++; - - for(; *p != GSOffset::EOP; p++) - { - _InterlockedDecrement16((short*)&m_rw_pages_rendering[*p] + 1); - } -} - -void CL_CALLBACK GSRendererCL::ReleasePageEvent(cl_event event, cl_int event_command_exec_status, void* user_data) -{ - if(event_command_exec_status == CL_COMPLETE) - { - cb_data* data = (cb_data*)user_data; - - data->r->ReleasePages(data->pages); - - delete data; - } -} - -static int RemapPSM(int psm) -{ - switch(psm) - { - default: - case PSM_PSMCT32: psm = 0; break; - case PSM_PSMCT24: psm = 1; break; - case PSM_PSMCT16: psm = 2; break; - case PSM_PSMCT16S: psm = 3; break; - case PSM_PSMZ32: psm = 4; break; - case PSM_PSMZ24: psm = 5; break; - case PSM_PSMZ16: psm = 6; break; - case PSM_PSMZ16S: psm = 7; break; - case PSM_PSMT8: psm = 8; break; - case PSM_PSMT4: psm = 9; break; - case PSM_PSMT8H: psm = 10; break; - case PSM_PSMT4HL: psm = 11; break; - case PSM_PSMT4HH: psm = 12; break; - } - - return psm; -} - -bool GSRendererCL::SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count) -{ - const GSDrawingEnvironment& env = m_env; - const GSDrawingContext* context = m_context; - const GS_PRIM_CLASS primclass = m_vt.m_primclass; - - TFXSelector sel; - - sel.key = 0; - - sel.atst = ATST_ALWAYS; - sel.tfx = TFX_NONE; - sel.ababcd = 0xff; - sel.prim = primclass; - - uint32 fm = context->FRAME.FBMSK; - uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; - - if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER) - { - fm = 0xffffffff; - zm = 0xffffffff; - } - - if(PRIM->TME) - { - if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) - { - m_mem.m_clut.Read32(context->TEX0, env.TEXA); - } - } - - if(context->TEST.ATE) - { - if(!TryAlphaTest(fm, zm)) - { - sel.atst = context->TEST.ATST; - sel.afail = context->TEST.AFAIL; - pb->aref = context->TEST.AREF; - - switch(sel.atst) - { - case ATST_LESS: - sel.atst = ATST_LEQUAL; - pb->aref--; - break; - case ATST_GREATER: - sel.atst = ATST_GEQUAL; - pb->aref++; - break; - } - } - } - - bool fwrite; - bool zwrite = zm != 0xffffffff; - - switch(context->FRAME.PSM) - { - default: - case PSM_PSMCT32: - case PSM_PSMZ32: - fwrite = fm != 0xffffffff; - break; - case PSM_PSMCT24: - case PSM_PSMZ24: - fwrite = (fm & 0x00ffffff) != 0x00ffffff; - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - fwrite = (fm & 0x80f8f8f8) != 0x80f8f8f8; - break; - } - - if(!fwrite && !zwrite) return false; - - bool ftest = sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; - bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS; - - sel.fwrite = fwrite; - sel.ftest = ftest; - sel.zwrite = zwrite; - sel.ztest = ztest; - - if(fwrite || ftest) - { - sel.fpsm = RemapPSM(context->FRAME.PSM); - - if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) - { - sel.iip = PRIM->IIP; - } - - if(PRIM->TME) - { - sel.tfx = context->TEX0.TFX; - sel.tcc = context->TEX0.TCC; - sel.fst = PRIM->FST; - sel.ltf = m_vt.IsLinear(); - sel.tpsm = RemapPSM(context->TEX0.PSM); - sel.aem = m_env.TEXA.AEM; - - pb->tbp[0] = context->TEX0.TBP0; - pb->tbw[0] = context->TEX0.TBW; - pb->ta0 = m_env.TEXA.TA0; - pb->ta1 = m_env.TEXA.TA1; - - if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) - { - sel.tlu = 1; - - memcpy(pb->clut, (const uint32*)m_mem.m_clut, sizeof(uint32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal); - } - - sel.wms = ((uint32)context->CLAMP.WMS + 1) & 3; - sel.wmt = ((uint32)context->CLAMP.WMT + 1) & 3; - - if(sel.tfx == TFX_MODULATE && sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) - { - // modulate does not do anything when vertex color is 0x80 - - sel.tfx = TFX_DECAL; - } - - bool mipmap = IsMipMapActive(); - - GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap); - - GSVector4i r; - - GetTextureMinMax(r, TEX0, context->CLAMP, sel.ltf); - - GSVector4i* src_pages = job->GetSrcPages(); - - GSOffset* o = m_mem.GetOffset(context->TEX0.TBP0, context->TEX0.TBW, context->TEX0.PSM); - - o->GetPagesAsBits(r, m_tmp_pages); - - for(int i = 0; i < 4; i++) - { - src_pages[i] |= m_tmp_pages[i]; - } - - if(mipmap) - { - // TEX1.MMIN - // 000 p - // 001 l - // 010 p round - // 011 p tri - // 100 l round - // 101 l tri - - if(m_vt.m_lod.x > 0) - { - sel.ltf = context->TEX1.MMIN >> 2; - } - else - { - // TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0 - } - - sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri - sel.lcm = context->TEX1.LCM; - - int mxl = std::min((int)context->TEX1.MXL, 6) << 16; - int k = context->TEX1.K << 12; - - if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL) - { - k = (int)m_vt.m_lod.x << 16; // set lod to max level - - sel.lcm = 1; // lod is constant - sel.mmin = 1; // tri-linear is meaningless - } - - if(sel.mmin == 2) - { - mxl--; // don't sample beyond the last level (TODO: add a dummy level instead?) - } - - if(sel.fst) - { - ASSERT(sel.lcm == 1); - ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) - - sel.lcm = 1; - } - - if(sel.lcm) - { - int lod = std::max(std::min(k, mxl), 0); - - if(sel.mmin == 1) - { - lod = (lod + 0x8000) & 0xffff0000; // rounding - } - - pb->lod = lod; - - // TODO: lot to optimize when lod is constant - } - else - { - pb->mxl = mxl; - pb->l = (float)(-0x10000 << context->TEX1.L); - pb->k = (float)k; - } - - GIFRegTEX0 MIP_TEX0 = TEX0; - GIFRegCLAMP MIP_CLAMP = context->CLAMP; - - GSVector4 tmin = m_vt.m_min.t; - GSVector4 tmax = m_vt.m_max.t; - - static int s_counter = 0; - - for(int i = 1, j = std::min((int)context->TEX1.MXL, 6); i <= j; i++) - { - switch(i) - { - case 1: - MIP_TEX0.TBP0 = context->MIPTBP1.TBP1; - MIP_TEX0.TBW = context->MIPTBP1.TBW1; - break; - case 2: - MIP_TEX0.TBP0 = context->MIPTBP1.TBP2; - MIP_TEX0.TBW = context->MIPTBP1.TBW2; - break; - case 3: - MIP_TEX0.TBP0 = context->MIPTBP1.TBP3; - MIP_TEX0.TBW = context->MIPTBP1.TBW3; - break; - case 4: - MIP_TEX0.TBP0 = context->MIPTBP2.TBP4; - MIP_TEX0.TBW = context->MIPTBP2.TBW4; - break; - case 5: - MIP_TEX0.TBP0 = context->MIPTBP2.TBP5; - MIP_TEX0.TBW = context->MIPTBP2.TBW5; - break; - case 6: - MIP_TEX0.TBP0 = context->MIPTBP2.TBP6; - MIP_TEX0.TBW = context->MIPTBP2.TBW6; - break; - default: - __assume(0); - } - - pb->tbp[i] = MIP_TEX0.TBP0; - pb->tbw[i] = MIP_TEX0.TBW; - - if(MIP_TEX0.TW > 0) MIP_TEX0.TW--; - if(MIP_TEX0.TH > 0) MIP_TEX0.TH--; - - MIP_CLAMP.MINU >>= 1; - MIP_CLAMP.MINV >>= 1; - MIP_CLAMP.MAXU >>= 1; - MIP_CLAMP.MAXV >>= 1; - - m_vt.m_min.t *= 0.5f; - m_vt.m_max.t *= 0.5f; - - GSVector4i r; - - GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, sel.ltf); - - GSOffset* o = m_mem.GetOffset(MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM); - - o->GetPagesAsBits(r, m_tmp_pages); - - for(int i = 0; i < 4; i++) - { - src_pages[i] |= m_tmp_pages[i]; - } - } - - s_counter++; - - m_vt.m_min.t = tmin; - m_vt.m_max.t = tmax; - } - else - { - if(sel.fst == 0) - { - // skip per pixel division if q is constant - - GSVertexCL* RESTRICT v = vertex; - - if(m_vt.m_eq.q) - { - sel.fst = 1; - - const GSVector4& t = v[index[0]].t; - - if(t.z != 1.0f) - { - GSVector4 w = t.zzzz().rcpnr(); - - for(int i = 0, j = vertex_count; i < j; i++) - { - GSVector4 t = v[i].t; - - v[i].t = (t * w).xyzw(t); - } - } - } - else if(primclass == GS_SPRITE_CLASS) - { - sel.fst = 1; - - for(int i = 0, j = vertex_count; i < j; i += 2) - { - GSVector4 t0 = v[i + 0].t; - GSVector4 t1 = v[i + 1].t; - - GSVector4 w = t1.zzzz().rcpnr(); - - v[i + 0].t = (t0 * w).xyzw(t0); - v[i + 1].t = (t1 * w).xyzw(t1); - } - } - } - } - - int tw = 1 << TEX0.TW; - int th = 1 << TEX0.TH; - - switch(context->CLAMP.WMS) - { - case CLAMP_REPEAT: - pb->minu = tw - 1; - pb->maxu = 0; - //gd.t.mask.u32[0] = 0xffffffff; - break; - case CLAMP_CLAMP: - pb->minu = 0; - pb->maxu = tw - 1; - //gd.t.mask.u32[0] = 0; - break; - case CLAMP_REGION_CLAMP: - pb->minu = std::min((int)context->CLAMP.MINU, tw - 1); - pb->maxu = std::min((int)context->CLAMP.MAXU, tw - 1); - //gd.t.mask.u32[0] = 0; - break; - case CLAMP_REGION_REPEAT: - pb->minu = (int)context->CLAMP.MINU & (tw - 1); - pb->maxu = (int)context->CLAMP.MAXU & (tw - 1); - //gd.t.mask.u32[0] = 0xffffffff; - break; - default: - __assume(0); - } - - switch(context->CLAMP.WMT) - { - case CLAMP_REPEAT: - pb->minv = th - 1; - pb->maxv = 0; - //gd.t.mask.u32[2] = 0xffffffff; - break; - case CLAMP_CLAMP: - pb->minv = 0; - pb->maxv = th - 1; - //gd.t.mask.u32[2] = 0; - break; - case CLAMP_REGION_CLAMP: - pb->minv = std::min((int)context->CLAMP.MINV, th - 1); - pb->maxv = std::min((int)context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256) - //gd.t.mask.u32[2] = 0; - break; - case CLAMP_REGION_REPEAT: - pb->minv = (int)context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127 - pb->maxv = (int)context->CLAMP.MAXV & (th - 1); - //gd.t.mask.u32[2] = 0xffffffff; - break; - default: - __assume(0); - } - } - - if(PRIM->FGE) - { - sel.fge = 1; - pb->fog = env.FOGCOL.u32[0]; - } - - if(context->FRAME.PSM != PSM_PSMCT24) - { - sel.date = context->TEST.DATE; - sel.datm = context->TEST.DATM; - } - - if(!IsOpaque()) - { - sel.abe = PRIM->ABE; - sel.ababcd = context->ALPHA.u32[0]; - - if(env.PABE.PABE) - { - sel.pabe = 1; - } - - if(m_aa1 && PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS)) - { - sel.aa1 = 1; - } - - pb->afix = context->ALPHA.FIX; - } - - if(sel.date || sel.aba == 1 || sel.abb == 1 || sel.abc == 1 && (sel.fpsm & 3) != 1 || sel.abd == 1) - { - sel.rfb = 1; - } - else - { - if(fwrite) - { - if(sel.atst != ATST_ALWAYS && sel.afail == AFAIL_RGB_ONLY - || (sel.fpsm & 3) == 0 && fm != 0 - || (sel.fpsm & 3) == 1 // always read-merge-write 24bpp, regardless the mask - || (sel.fpsm & 3) >= 2 && (fm & 0x80f8f8f8) != 0) - { - sel.rfb = 1; - } - } - } - - sel.colclamp = env.COLCLAMP.CLAMP; - sel.fba = context->FBA.FBA; - - if(env.DTHE.DTHE) - { - sel.dthe = 1; - - GSVector4i dimx0 = env.dimx[1].sll32(16).sra32(16); - GSVector4i dimx1 = env.dimx[3].sll32(16).sra32(16); - GSVector4i dimx2 = env.dimx[5].sll32(16).sra32(16); - GSVector4i dimx3 = env.dimx[7].sll32(16).sra32(16); - - pb->dimx = dimx0.ps32(dimx1).ps16(dimx2.ps32(dimx3)); - } - } - - if(zwrite || ztest) - { - sel.zpsm = RemapPSM(context->ZBUF.PSM); - sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS; - - if(ztest) - { - sel.rzb = 1; - } - else - { - if(zwrite) - { - if(sel.atst != ATST_ALWAYS && (sel.afail == AFAIL_FB_ONLY || sel.afail == AFAIL_RGB_ONLY) - || (sel.zpsm & 3) == 1) // always read-merge-write 24bpp, regardless the mask - { - sel.rzb = 1; - } - } - } - } - - pb->fm = fm; - pb->zm = zm; - - if((sel.fpsm & 3) == 1) - { - pb->fm |= 0xff000000; - } - else if((sel.fpsm & 3) >= 2) - { - uint32 rb = pb->fm & 0x00f800f8; - uint32 ga = pb->fm & 0x8000f800; - - pb->fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000; - } - - if((sel.zpsm & 3) == 1) - { - pb->zm |= 0xff000000; - } - else if((sel.zpsm & 3) >= 2) - { - pb->zm |= 0xffff0000; - } - - pb->fbp = context->FRAME.Block(); - pb->zbp = context->ZBUF.Block(); - pb->bw = context->FRAME.FBW; - - pb->sel = sel; - - return true; -} - -// - -GSRendererCL::TFXJob::TFXJob() - : src_pages(NULL) - , dst_pages(NULL) -{ -} - -GSRendererCL::TFXJob::~TFXJob() -{ - if(src_pages != NULL) _aligned_free(src_pages); - if(dst_pages != NULL) _aligned_free(dst_pages); -} - -GSVector4i* GSRendererCL::TFXJob::GetSrcPages() -{ - if(src_pages == NULL) - { - src_pages = (GSVector4i*)_aligned_malloc(sizeof(GSVector4i) * 4, 16); - - src_pages[0] = GSVector4i::zero(); - src_pages[1] = GSVector4i::zero(); - src_pages[2] = GSVector4i::zero(); - src_pages[3] = GSVector4i::zero(); - } - - return src_pages; -} - -GSVector4i* GSRendererCL::TFXJob::GetDstPages() -{ - if(dst_pages == NULL) - { - dst_pages = (GSVector4i*)_aligned_malloc(sizeof(GSVector4i) * 4, 16); - - dst_pages[0] = GSVector4i::zero(); - dst_pages[1] = GSVector4i::zero(); - dst_pages[2] = GSVector4i::zero(); - dst_pages[3] = GSVector4i::zero(); - } - - return dst_pages; -} - -// - -//#define IOCL_DEBUG - -GSRendererCL::CL::CL() -{ - WIs = INT_MAX; - version = INT_MAX; - - std::string ocldev = theApp.GetConfig("ocldev", ""); - -#ifdef IOCL_DEBUG - ocldev = "Intel(R) Corporation Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz OpenCL C 1.2 CPU"; -#endif - - list dl; - - GSUtil::GetDeviceDescs(dl); - - for(auto d : dl) - { - if(d.name == ocldev) - { - devs.push_back(d); - - WIs = std::min(WIs, (uint32)d.device.getInfo()); - version = std::min(version, d.version); - - break; // TODO: multiple devices? - } - } - - if(devs.empty() && !dl.empty()) - { - auto d = dl.front(); - - devs.push_back(d); - - WIs = std::min(WIs, (uint32)d.device.getInfo()); - version = std::min(version, d.version); - } - - if(devs.empty()) - { - throw new std::exception("OpenCL device not found"); - } - - vector tmp; - - for(auto d : devs) tmp.push_back(d.device); - - context = cl::Context(tmp); - - queue[0] = cl::CommandQueue(context); - queue[1] = cl::CommandQueue(context); - queue[2] = cl::CommandQueue(context); - - vector buff; - - if(theApp.LoadResource(IDR_TFX_CL, buff)) - { - kernel_str = std::string((const char*)buff.data(), buff.size()); - } - - vb.head = vb.tail = vb.size = 0; - ib.head = ib.tail = ib.size = 0; - pb.head = pb.tail = pb.size = 0; - - vb.mapped_ptr = vb.ptr = NULL; - ib.mapped_ptr = ib.ptr = NULL; - pb.mapped_ptr = pb.ptr = NULL; - - pb.size = TFX_PARAM_SIZE * 256; - pb.buff[0] = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, pb.size); - pb.buff[1] = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, pb.size); - - env = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(gs_env)); - - wqidx = 0; - wq = &queue[0]; -} - -GSRendererCL::CL::~CL() -{ - Unmap(); -} - -void GSRendererCL::CL::Map() -{ - Unmap(); - - cl_map_flags flags = version >= 120 ? CL_MAP_WRITE_INVALIDATE_REGION : CL_MAP_WRITE; - - if(vb.head < vb.size) - { - vb.mapped_ptr = wq->enqueueMapBuffer(vb.buff[wqidx], CL_TRUE, flags, vb.head, vb.size - vb.head); - vb.ptr = (unsigned char*)vb.mapped_ptr - vb.head; - ASSERT(((size_t)vb.ptr & 15) == 0); - } - - if(ib.head < ib.size) - { - ib.mapped_ptr = wq->enqueueMapBuffer(ib.buff[wqidx], CL_TRUE, flags, ib.head, ib.size - ib.head); - ib.ptr = (unsigned char*)ib.mapped_ptr - ib.head; - } - - if(pb.head < pb.size) - { - pb.mapped_ptr = wq->enqueueMapBuffer(pb.buff[wqidx], CL_TRUE, flags, pb.head, pb.size - pb.head); - pb.ptr = (unsigned char*)pb.mapped_ptr - pb.head; - ASSERT(((size_t)pb.ptr & 15) == 0); - } -} - -void GSRendererCL::CL::Unmap() -{ - if(vb.mapped_ptr != NULL) wq->enqueueUnmapMemObject(vb.buff[wqidx], vb.mapped_ptr); - if(ib.mapped_ptr != NULL) wq->enqueueUnmapMemObject(ib.buff[wqidx], ib.mapped_ptr); - if(pb.mapped_ptr != NULL) wq->enqueueUnmapMemObject(pb.buff[wqidx], pb.mapped_ptr); - - vb.mapped_ptr = vb.ptr = NULL; - ib.mapped_ptr = ib.ptr = NULL; - pb.mapped_ptr = pb.ptr = NULL; -} - -cl::Kernel GSRendererCL::CL::Build(const char* entry, ostringstream& opt) -{ - cl::Program program; - - if(version >= 120) - { - cl::Program::Binaries binaries; - - try - { - for(auto d : devs) - { - string path = d.tmppath + "/" + entry; - - FILE* f = fopen(path.c_str(), "rb"); - - if(f != NULL) - { - fseek(f, 0, SEEK_END); - long size = ftell(f); - pair b(new char[size], size); - fseek(f, 0, SEEK_SET); - fread(b.first, b.second, 1, f); - fclose(f); - - binaries.push_back(b); - } - else - { - break; - } - } - - if(binaries.size() == devs.size()) - { - vector tmp; - - for(auto d : devs) tmp.push_back(d.device); - - program = cl::Program(context, tmp, binaries); - - AddDefs(opt); - - program.build(opt.str().c_str()); - - cl::Kernel kernel = cl::Kernel(program, entry); - - return kernel; - } - } - catch(cl::Error err) - { - printf("%s (%d)\n", err.what(), err.err()); - } - - for(auto b : binaries) - { - delete [] b.first; - } - } - - try - { - printf("building kernel (%s)\n", entry); - - program = cl::Program(context, kernel_str); - - AddDefs(opt); - - program.build(opt.str().c_str()); - } - catch(cl::Error err) - { - if(err.err() == CL_BUILD_PROGRAM_FAILURE) - { - for(auto d : devs) - { - auto s = program.getBuildInfo(d.device); - - printf("kernel (%s) build error: %s\n", entry, s.c_str()); - } - } - - throw err; - } - - if(version >= 120) - { - try - { - vector sizes = program.getInfo(); - vector binaries = program.getInfo(); - - for(int i = 0; i < binaries.size(); i++) - { - string path = devs[i].tmppath + "/" + entry; - - FILE* f = fopen(path.c_str(), "wb"); - - if(f != NULL) - { - fwrite(binaries[i], sizes[i], 1, f); - fclose(f); - } - - delete [] binaries[i]; - } - } - catch(cl::Error err) - { - printf("%s (%d)\n", err.what(), err.err()); - } - } - - return cl::Kernel(program, entry); -} - -void GSRendererCL::CL::AddDefs(ostringstream& opt) -{ - if(version == 110) opt << "-cl-std=CL1.1 "; - else opt << "-cl-std=CL1.2 "; - opt << "-D MAX_FRAME_SIZE=" << MAX_FRAME_SIZE << "u "; - opt << "-D MAX_PRIM_COUNT=" << MAX_PRIM_COUNT << "u "; - opt << "-D MAX_PRIM_PER_BATCH_BITS=" << MAX_PRIM_PER_BATCH_BITS << "u "; - opt << "-D MAX_PRIM_PER_BATCH=" << MAX_PRIM_PER_BATCH << "u "; - opt << "-D MAX_BATCH_COUNT=" << MAX_BATCH_COUNT << "u "; - opt << "-D BIN_SIZE_BITS=" << BIN_SIZE_BITS << " "; - opt << "-D BIN_SIZE=" << BIN_SIZE << "u "; - opt << "-D MAX_BIN_PER_BATCH=" << MAX_BIN_PER_BATCH << "u "; - opt << "-D MAX_BIN_COUNT=" << MAX_BIN_COUNT << "u "; - opt << "-D TFX_PARAM_SIZE=" << TFX_PARAM_SIZE << "u "; -#ifdef IOCL_DEBUG - opt << "-g -s \"E:\\Progs\\pcsx2\\plugins\\GSdx\\res\\tfx.cl\" "; -#endif -} - -cl::Kernel& GSRendererCL::CL::GetPrimKernel(const PrimSelector& sel) -{ - auto i = prim_map.find(sel); - - if(i != prim_map.end()) - { - return i->second; - } - - char entry[256]; - - sprintf(entry, "prim_%02x", sel); - - ostringstream opt; - - opt << "-D KERNEL_PRIM=" << entry << " "; - opt << "-D PRIM=" << sel.prim << " "; - - cl::Kernel k = Build(entry, opt); - - prim_map[sel] = k; - - k.setArg(0, env); - - return prim_map[sel]; -} - -cl::Kernel& GSRendererCL::CL::GetTileKernel(const TileSelector& sel) -{ - auto i = tile_map.find(sel); - - if(i != tile_map.end()) - { - return i->second; - } - - char entry[256]; - - sprintf(entry, "tile_%02x", sel); - - ostringstream opt; - - opt << "-D KERNEL_TILE=" << entry << " "; - opt << "-D PRIM=" << sel.prim << " "; - opt << "-D MODE=" << sel.mode << " "; - opt << "-D CLEAR=" << sel.clear << " "; - - cl::Kernel k = Build(entry, opt); - - tile_map[sel] = k; - - k.setArg(0, env); - - return tile_map[sel]; -} - -cl::Kernel& GSRendererCL::CL::GetTFXKernel(const TFXSelector& sel) -{ - auto i = tfx_map.find(sel); - - if(i != tfx_map.end()) - { - return i->second; - } - - char entry[256]; - - sprintf(entry, "tfx_%016llx", sel); - - ostringstream opt; - - opt << "-D KERNEL_TFX=" << entry << " "; - opt << "-D FPSM=" << sel.fpsm << " "; - opt << "-D ZPSM=" << sel.zpsm << " "; - opt << "-D ZTST=" << sel.ztst << " "; - opt << "-D ATST=" << sel.atst << " "; - opt << "-D AFAIL=" << sel.afail << " "; - opt << "-D IIP=" << sel.iip << " "; - opt << "-D TFX=" << sel.tfx << " "; - opt << "-D TCC=" << sel.tcc << " "; - opt << "-D FST=" << sel.fst << " "; - opt << "-D LTF=" << sel.ltf << " "; - opt << "-D TLU=" << sel.tlu << " "; - opt << "-D FGE=" << sel.fge << " "; - opt << "-D DATE=" << sel.date << " "; - opt << "-D ABE=" << sel.abe << " "; - opt << "-D ABA=" << sel.aba << " "; - opt << "-D ABB=" << sel.abb << " "; - opt << "-D ABC=" << sel.abc << " "; - opt << "-D ABD=" << sel.abd << " "; - opt << "-D PABE=" << sel.pabe << " "; - opt << "-D AA1=" << sel.aa1 << " "; - opt << "-D FWRITE=" << sel.fwrite << " "; - opt << "-D FTEST=" << sel.ftest << " "; - opt << "-D RFB=" << sel.rfb << " "; - opt << "-D ZWRITE=" << sel.zwrite << " "; - opt << "-D ZTEST=" << sel.ztest << " "; - opt << "-D RZB=" << sel.rzb << " "; - opt << "-D WMS=" << sel.wms << " "; - opt << "-D WMT=" << sel.wmt << " "; - opt << "-D DATM=" << sel.datm << " "; - opt << "-D COLCLAMP=" << sel.colclamp << " "; - opt << "-D FBA=" << sel.fba << " "; - opt << "-D DTHE=" << sel.dthe << " "; - opt << "-D PRIM=" << sel.prim << " "; - opt << "-D LCM=" << sel.lcm << " "; - opt << "-D MMIN=" << sel.mmin << " "; - opt << "-D NOSCISSOR=" << sel.noscissor << " "; - opt << "-D TPSM=" << sel.tpsm << " "; - opt << "-D AEM=" << sel.aem << " "; - opt << "-D FB=" << sel.fb << " "; - opt << "-D ZB=" << sel.zb << " "; - opt << "-D MERGED=" << sel.merged << " "; - - cl::Kernel k = Build(entry, opt); - - tfx_map[sel] = k; - - k.setArg(0, env); - k.setArg(1, vm); - - return tfx_map[sel]; -} -#endif diff --git a/plugins/GSdx_legacy/GSRendererCL.h b/plugins/GSdx_legacy/GSRendererCL.h deleted file mode 100644 index 81ec47ba49..0000000000 --- a/plugins/GSdx_legacy/GSRendererCL.h +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRenderer.h" - -#ifdef ENABLE_OPENCL - -__aligned(struct, 32) GSVertexCL -{ - GSVector4 p, t; -}; - -class GSRendererCL : public GSRenderer -{ - typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); - - ConvertVertexBufferPtr m_cvb[4][2][2]; - - template - void ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); - - union PrimSelector - { - struct - { - uint32 prim:2; // 0 - }; - - uint32 key; - - operator uint32() const { return key; } - }; - - union TileSelector - { - struct - { - uint32 prim:2; // 0 - uint32 mode:2; // 2 - uint32 clear:1; // 4 - }; - - uint32 key; - - operator uint32() const { return key; } - }; - - union TFXSelector - { - struct - { - uint32 fpsm:3; // 0 - uint32 zpsm:3; // 3 - uint32 ztst:2; // 6 (0: off, 1: write, 2: test (ge), 3: test (g)) - uint32 atst:3; // 8 - uint32 afail:2; // 11 - uint32 iip:1; // 13 - uint32 tfx:3; // 14 - uint32 tcc:1; // 17 - uint32 fst:1; // 18 - uint32 ltf:1; // 19 - uint32 tlu:1; // 20 - uint32 fge:1; // 21 - uint32 date:1; // 22 - uint32 abe:1; // 23 - uint32 aba:2; // 24 - uint32 abb:2; // 26 - uint32 abc:2; // 28 - uint32 abd:2; // 30 - - uint32 pabe:1; // 32 - uint32 aa1:1; // 33 - uint32 fwrite:1; // 34 - uint32 ftest:1; // 35 - uint32 rfb:1; // 36 - uint32 zwrite:1; // 37 - uint32 ztest:1; // 38 - uint32 rzb:1; // 39 - uint32 wms:2; // 40 - uint32 wmt:2; // 42 - uint32 datm:1; // 44 - uint32 colclamp:1; // 45 - uint32 fba:1; // 46 - uint32 dthe:1; // 47 - uint32 prim:2; // 48 - uint32 lcm:1; // 50 - uint32 mmin:2; // 51 - uint32 noscissor:1; // 53 - uint32 tpsm:4; // 54 - uint32 aem:1; // 58 - uint32 merged:1; // 59 - // TODO - }; - - struct - { - uint32 _pad1:24; - uint32 ababcd:8; - uint32 _pad2:2; - uint32 fb:2; - uint32 _pad3:1; - uint32 zb:2; - }; - - struct - { - uint32 lo; - uint32 hi; - }; - - uint64 key; - - operator uint64() const { return key; } - - bool IsSolidRect() const - { - return prim == GS_SPRITE_CLASS - && iip == 0 - && tfx == TFX_NONE - && abe == 0 - && ztst <= 1 - && atst <= 1 - && date == 0 - && fge == 0; - } - }; - - __aligned(struct, 32) TFXParameter - { - GSVector4i scissor; - GSVector4i dimx; // 4x4 signed char - TFXSelector sel; - uint32 fbp, zbp, bw; - uint32 fm, zm; - uint32 fog; // rgb - uint8 aref, afix; - uint8 ta0, ta1; - uint32 tbp[7], tbw[7]; - int minu, maxu, minv, maxv; // umsk, ufix, vmsk, vfix - int lod; // lcm == 1 - int mxl; - float l; // TEX1.L * -0x10000 - float k; // TEX1.K * 0x10000 - uint32 clut[256]; - }; - - class TFXJob - { - public: - struct { int x, y, z, w; } rect; - TFXSelector sel; - uint32 ib_start; - uint32 prim_count; - GSVector4i* src_pages; // read by any texture level - GSVector4i* dst_pages; // f/z writes to it - uint32 fbp, zbp, bw; - uint32 fpsm, zpsm, tpsm; -#ifdef DEBUG - TFXParameter* pb; -#endif - TFXJob(); - virtual ~TFXJob(); - - GSVector4i* GetSrcPages(); - GSVector4i* GetDstPages(); - }; - - class CL - { - std::string kernel_str; - std::map prim_map; - std::map tile_map; - std::map tfx_map; - - cl::Kernel Build(const char* entry, ostringstream& opt); - void AddDefs(ostringstream& opt); - - public: - std::vector devs; - cl::Context context; - cl::CommandQueue queue[3]; - cl::Buffer vm; - cl::Buffer tex; - struct { cl::Buffer buff[2]; size_t head, tail, size; unsigned char* ptr; void* mapped_ptr; } vb, ib, pb; - cl::Buffer env; - cl::CommandQueue* wq; - int wqidx; - uint32 WIs; - int version; - - public: - CL(); - virtual ~CL(); - - cl::Kernel& GetPrimKernel(const PrimSelector& sel); - cl::Kernel& GetTileKernel(const TileSelector& sel); - cl::Kernel& GetTFXKernel(const TFXSelector& sel); - - void Map(); - void Unmap(); - }; - - CL m_cl; - std::list> m_jobs; - uint32 m_vb_start; - uint32 m_vb_count; - uint32 m_pb_start; - uint32 m_pb_count; - bool m_synced; - - void Enqueue(); - void EnqueueTFX(std::list>& jobs, uint32 bin_count, const cl_uchar4& bin_dim); - void JoinTFX(std::list>& jobs); - bool UpdateTextureCache(TFXJob* job); - void InvalidateTextureCache(TFXJob* job); - void UsePages(uint32* pages); - void ReleasePages(uint32* pages); - - static void CL_CALLBACK ReleasePageEvent(cl_event event, cl_int event_command_exec_status, void* user_data); - -protected: - GSTexture* m_texture[2]; - uint8* m_output; - - GSVector4i m_rw_pages[2][4]; // pages that may be read or modified by the rendering queue, f/z rw, tex r - GSVector4i m_tc_pages[4]; // invalidated texture cache pages (split this into 8:24?) // TODO: this should be block level, too many overlaps inside pages with render targets - GSVector4i m_tmp_pages[4]; - uint32 m_tmp_pages2[MAX_PAGES + 1]; - uint32 m_rw_pages_rendering[512]; // pages that are currently in-use - - void Reset(); - void VSync(int field); - void ResetDevice(); - GSTexture* GetOutput(int i); - - void Draw(); - void Sync(int reason); - void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); - void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); - - bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count); - -public: - GSRendererCL(); - virtual ~GSRendererCL(); -}; - -#endif diff --git a/plugins/GSdx_legacy/GSRendererCS.cpp b/plugins/GSdx_legacy/GSRendererCS.cpp deleted file mode 100644 index 86b1e23b07..0000000000 --- a/plugins/GSdx_legacy/GSRendererCS.cpp +++ /dev/null @@ -1,877 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererCS.h" - -#define PS_BATCH_SIZE 512 - -GSRendererCS::GSRendererCS() - : GSRenderer() -{ - m_nativeres = true; - - memset(m_vm_valid, 0, sizeof(m_vm_valid)); - - memset(m_texture, 0, sizeof(m_texture)); - - m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); -} - -GSRendererCS::~GSRendererCS() -{ - for(size_t i = 0; i < countof(m_texture); i++) - { - delete m_texture[i]; - } - - _aligned_free(m_output); -} - -bool GSRendererCS::CreateDevice(GSDevice* dev_unk) -{ - if(!__super::CreateDevice(dev_unk)) - return false; - - HRESULT hr; - - D3D11_DEPTH_STENCIL_DESC dsd; - D3D11_BLEND_DESC bsd; - D3D11_SAMPLER_DESC sd; - D3D11_BUFFER_DESC bd; - D3D11_TEXTURE2D_DESC td; - D3D11_UNORDERED_ACCESS_VIEW_DESC uavd; - D3D11_SHADER_RESOURCE_VIEW_DESC srvd; - - D3D_FEATURE_LEVEL level; - - ((GSDeviceDX*)dev_unk)->GetFeatureLevel(level); - - if(level < D3D_FEATURE_LEVEL_11_0) - return false; - - GSDevice11* dev = (GSDevice11*)dev_unk; - - ID3D11DeviceContext* ctx = *dev; - - // empty depth stencil state - - memset(&dsd, 0, sizeof(dsd)); - - dsd.StencilEnable = false; - dsd.DepthEnable = false; - - hr = (*dev)->CreateDepthStencilState(&dsd, &m_dss); - - if(FAILED(hr)) return false; - - // empty blend state - - memset(&bsd, 0, sizeof(bsd)); - - bsd.RenderTarget[0].BlendEnable = false; - - hr = (*dev)->CreateBlendState(&bsd, &m_bs); - - if(FAILED(hr)) return false; - - // point sampler - - memset(&sd, 0, sizeof(sd)); - - sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.MinLOD = -FLT_MAX; - sd.MaxLOD = FLT_MAX; - sd.MaxAnisotropy = theApp.GetConfig("MaxAnisotropy", 0); - sd.ComparisonFunc = D3D11_COMPARISON_NEVER; - - hr = (*dev)->CreateSamplerState(&sd, &m_ss); - - if(FAILED(hr)) return false; - - // link buffer - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = 256 << 20; // 256 MB w00t - bd.StructureByteStride = sizeof(uint32) * 4; // c, z, id, next - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; - bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; - - hr = (*dev)->CreateBuffer(&bd, NULL, &m_lb); - - { - uint32 data[] = {0, 0, 0xffffffff, 0}; - - D3D11_BOX box; - memset(&box, 0, sizeof(box)); - box.right = sizeof(data); - box.bottom = 1; - box.back = 1; - - ctx->UpdateSubresource(m_lb, 0, &box, data, 0, 0); - } - - if(FAILED(hr)) return false; - - memset(&uavd, 0, sizeof(uavd)); - - uavd.Format = DXGI_FORMAT_UNKNOWN; - uavd.Buffer.NumElements = bd.ByteWidth / bd.StructureByteStride; - uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_COUNTER; - uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; - - hr = (*dev)->CreateUnorderedAccessView(m_lb, &uavd, &m_lb_uav); - - if(FAILED(hr)) return false; - - memset(&srvd, 0, sizeof(srvd)); - - srvd.Format = DXGI_FORMAT_UNKNOWN; - srvd.Buffer.NumElements = bd.ByteWidth / bd.StructureByteStride; - srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; - - hr = (*dev)->CreateShaderResourceView(m_lb, &srvd, &m_lb_srv); - - if(FAILED(hr)) return false; - - // start offset buffer - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(uint32) * 2048 * 2048; // index - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; - bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; - - hr = (*dev)->CreateBuffer(&bd, NULL, &m_sob); - - if(FAILED(hr)) return false; - - memset(&uavd, 0, sizeof(uavd)); - - uavd.Format = DXGI_FORMAT_R32_TYPELESS; - uavd.Buffer.NumElements = bd.ByteWidth / sizeof(uint32); - uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; - uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; - - hr = (*dev)->CreateUnorderedAccessView(m_sob, &uavd, &m_sob_uav); - - if(FAILED(hr)) return false; - - memset(&srvd, 0, sizeof(srvd)); - - srvd.Format = DXGI_FORMAT_R32_TYPELESS; - srvd.BufferEx.NumElements = bd.ByteWidth / sizeof(uint32); - srvd.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW; - srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX; - - hr = (*dev)->CreateShaderResourceView(m_sob, &srvd, &m_sob_srv); - - if(FAILED(hr)) return false; - - const uint32 tmp = 0; - - ctx->ClearUnorderedAccessViewUint(m_sob_uav, &tmp); // initial clear, next time Draw should restore it in Step 2 - - // video memory (4MB) - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = 4 * 1024 * 1024; - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS; - bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; - - hr = (*dev)->CreateBuffer(&bd, NULL, &m_vm); - - if(FAILED(hr)) return false; - - memset(&uavd, 0, sizeof(uavd)); - - uavd.Format = DXGI_FORMAT_R32_TYPELESS; - uavd.Buffer.FirstElement = 0; - uavd.Buffer.NumElements = 1024 * 1024; - uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; - uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; - - hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav); - - if(FAILED(hr)) return false; -/* - memset(&td, 0, sizeof(td)); - - td.Width = PAGE_SIZE; - td.Height = MAX_PAGES; - td.Format = DXGI_FORMAT_R8_UINT; - td.MipLevels = 1; - td.ArraySize = 1; - td.SampleDesc.Count = 1; - td.SampleDesc.Quality = 0; - td.Usage = D3D11_USAGE_DEFAULT; - td.BindFlags = D3D11_BIND_UNORDERED_ACCESS; - - hr = (*dev)->CreateTexture2D(&td, NULL, &m_vm); - - if(FAILED(hr)) return false; - - memset(&uavd, 0, sizeof(uavd)); - - uavd.Format = DXGI_FORMAT_R8_UINT; - uavd.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; - - hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav); - - if(FAILED(hr)) return false; -*/ - // one page, for copying between cpu<->gpu - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = PAGE_SIZE; - bd.Usage = D3D11_USAGE_STAGING; - bd.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - - hr = (*dev)->CreateBuffer(&bd, NULL, &m_pb); - - if(FAILED(hr)) return false; -/* - memset(&td, 0, sizeof(td)); - - td.Width = PAGE_SIZE; - td.Height = 1; - td.Format = DXGI_FORMAT_R8_UINT; - td.MipLevels = 1; - td.ArraySize = 1; - td.SampleDesc.Count = 1; - td.SampleDesc.Quality = 0; - td.Usage = D3D11_USAGE_STAGING; - td.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - - hr = (*dev)->CreateTexture2D(&td, NULL, &m_pb); - - if(FAILED(hr)) return false; -*/ - // VSConstantBuffer - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(VSConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = (*dev)->CreateBuffer(&bd, NULL, &m_vs_cb); - - if(FAILED(hr)) return false; - - // PS - - D3D_SHADER_MACRO macro[] = - { - {NULL, NULL}, - }; - - try - { - vector shader; - theApp.LoadResource(IDR_CS_FX, shader); - dev->CompileShader((const char *)shader.data(), shader.size(), "cs.fx", nullptr, "ps_main0", macro, &m_ps0); - } - catch (GSDXRecoverableError) - { - return false; - } - - // PSConstantBuffer - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(PSConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = (*dev)->CreateBuffer(&bd, NULL, &m_ps_cb); - - if(FAILED(hr)) return false; - - // - - return true; -} - -void GSRendererCS::ResetDevice() -{ - for(size_t i = 0; i < countof(m_texture); i++) - { - delete m_texture[i]; - - m_texture[i] = NULL; - } -} - -void GSRendererCS::VSync(int field) -{ - __super::VSync(field); - - //printf("%lld\n", m_perfmon.GetFrame()); -} - -GSTexture* GSRendererCS::GetOutput(int i) -{ - // TODO: create a compute shader which unswizzles the frame from m_vm to the output texture - - const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB; - - int w = DISPFB.FBW * 64; - int h = GetFrameRect(i).bottom; - - // TODO: round up bottom - - if(m_dev->ResizeTexture(&m_texture[i], w, h)) - { - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM]; - - GSVector4i r(0, 0, w, h); - GSVector4i r2 = r.ralign(psm.bs); - - GSOffset* off = m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM); - - Read(off, r2, false); - - (m_mem.*psm.rtx)(off, r2, m_output, 1024 * 4, m_env.TEXA); - - m_texture[i]->Update(r, m_output, 1024 * 4); - - if(s_dump) - { - if(s_save && s_n >= s_saven) - { - m_texture[i]->Save(format("c:\\temp1\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), (int)DISPFB.PSM)); - } - - s_n++; - } - } - - return m_texture[i]; -} - -void GSRendererCS::Draw() -{ - GSDrawingEnvironment& env = m_env; - GSDrawingContext* context = m_context; - - GSVector2i rtsize(2048, 2048); - GSVector4i scissor = GSVector4i(context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); - GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil())); - GSVector4i r = bbox.rintersect(scissor); - - uint32 fm = context->FRAME.FBMSK; - uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; - - if(fm != 0xffffffff) - { - Write(context->offset.fb, r); - - // TODO: m_tc->InvalidateVideoMem(context->offset.fb, r, false); - } - - if(zm != 0xffffffff) - { - Write(context->offset.zb, r); - - // TODO: m_tc->InvalidateVideoMem(context->offset.zb, r, false); - } - - // TODO: if(24-bit) fm/zm |= 0xff000000; - - if(PRIM->TME) - { - m_mem.m_clut.Read32(context->TEX0, env.TEXA); - - GSVector4i r; - - GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); - - // TODO: unswizzle pages of r to a texture, check m_vm_valid, bit not set cpu->gpu, set gpu->gpu - - // TODO: Write transfer should directly write to m_vm, then Read/Write syncing won't be necessary, clut must be updated with the gpu also - - // TODO: tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); - - // if(!tex) return; - } - - // - - GSDevice11* dev = (GSDevice11*)m_dev; - - ID3D11DeviceContext* ctx = *dev; - - // - - dev->BeginScene(); - - // SetupOM - - dev->OMSetDepthStencilState(m_dss, 0); - dev->OMSetBlendState(m_bs, 0); - - ID3D11UnorderedAccessView* uavs[] = {m_vm_uav, m_lb_uav, m_sob_uav}; - uint32 counters[] = {1, 0, 0}; - - dev->OMSetRenderTargets(rtsize, countof(uavs), uavs, counters, &scissor); - - // SetupIA - - D3D11_PRIMITIVE_TOPOLOGY topology; - - switch(m_vt.m_primclass) - { - case GS_POINT_CLASS: - topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; - break; - case GS_TRIANGLE_CLASS: - topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - break; - default: - __assume(0); - } - - GSVector4i r2 = bbox.add32(GSVector4i(-1, -1, 1, 1)).rintersect(scissor); - - m_vertex.buff[m_vertex.next + 0].XYZ.X = (uint16)(context->XYOFFSET.OFX + (r2.left << 4)); - m_vertex.buff[m_vertex.next + 0].XYZ.Y = (uint16)(context->XYOFFSET.OFY + (r2.top << 4)); - m_vertex.buff[m_vertex.next + 1].XYZ.X = (uint16)(context->XYOFFSET.OFX + (r2.right << 4)); - m_vertex.buff[m_vertex.next + 1].XYZ.Y = (uint16)(context->XYOFFSET.OFY + (r2.bottom << 4)); - - m_index.buff[m_index.tail + 0] = m_vertex.next + 0; - m_index.buff[m_index.tail + 1] = m_vertex.next + 1; - - dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertex), m_vertex.next + 2); - dev->IASetIndexBuffer(m_index.buff, m_index.tail + 2); - - // SetupVS - - VSSelector vs_sel; - - vs_sel.tme = PRIM->TME; - vs_sel.fst = PRIM->FST; - - VSConstantBuffer vs_cb; - - float sx = 2.0f / (rtsize.x << 4); - float sy = 2.0f / (rtsize.y << 4); - //float sx = 1.0f / 16; - //float sy = 1.0f / 16; - float ox = (float)(int)context->XYOFFSET.OFX; - float oy = (float)(int)context->XYOFFSET.OFY; - - vs_cb.VertexScale = GSVector4(sx, -sy, 0.0f, 0.0f); - vs_cb.VertexOffset = GSVector4(ox * sx + 1, -(oy * sy + 1), 0.0f, -1.0f); - //vs_cb.VertexScale = GSVector4(sx, sy, 0.0f, 0.0f); - //vs_cb.VertexOffset = GSVector4(ox * sx, oy * sy, 0.0f, -1.0f); - - { - GSVertexShader11 vs; - - hash_map::const_iterator i = m_vs.find(vs_sel); - - if(i != m_vs.end()) - { - vs = i->second; - } - else - { - string str[2]; - - str[0] = format("%d", vs_sel.tme); - str[1] = format("%d", vs_sel.fst); - - D3D_SHADER_MACRO macro[] = - { - {"VS_TME", str[0].c_str()}, - {"VS_FST", str[1].c_str()}, - {NULL, NULL}, - }; - - D3D11_INPUT_ELEMENT_DESC layout[] = - { - {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0}, - }; - - vector shader; - theApp.LoadResource(IDR_CS_FX, shader); - dev->CompileShader((const char *)shader.data(), shader.size(), "cs.fx", nullptr, "vs_main", macro, &vs.vs, layout, countof(layout), &vs.il); - - m_vs[vs_sel] = vs; - } - - ctx->UpdateSubresource(m_vs_cb, 0, NULL, &vs_cb, 0, 0); // TODO: only update if changed - - dev->VSSetShader(vs.vs, m_vs_cb); - - dev->IASetInputLayout(vs.il); - } - - // SetupGS - - GSSelector gs_sel; - - gs_sel.iip = PRIM->IIP; - - CComPtr gs[2]; - - for(int j = 0; j < 2; j++) - { - gs_sel.prim = j == 0 ? m_vt.m_primclass : GS_SPRITE_CLASS; - - hash_map >::const_iterator i = m_gs.find(gs_sel); - - if(i != m_gs.end()) - { - gs[j] = i->second; - } - else - { - string str[2]; - - str[0] = format("%d", gs_sel.iip); - str[1] = format("%d", j == 0 ? gs_sel.prim : GS_SPRITE_CLASS); - - D3D_SHADER_MACRO macro[] = - { - {"GS_IIP", str[0].c_str()}, - {"GS_PRIM", str[1].c_str()}, - {NULL, NULL}, - }; - - vector shader; - theApp.LoadResource(IDR_CS_FX, shader); - dev->CompileShader((const char *)shader.data(), shader.size(), "cs.fx", nullptr, "gs_main", macro, &gs[j]); - - m_gs[gs_sel] = gs[j]; - } - } - - // SetupPS - - dev->PSSetSamplerState(m_ss, NULL, NULL); - - PSSelector ps_sel; - - ps_sel.fpsm = context->FRAME.PSM; - ps_sel.zpsm = context->ZBUF.PSM; - - CComPtr ps[2] = {m_ps0, NULL}; - - hash_map >::const_iterator i = m_ps1.find(ps_sel); - - if(i != m_ps1.end()) - { - ps[1] = i->second; - } - else - { - string str[15]; - - str[0] = format("%d", PS_BATCH_SIZE); - str[1] = format("%d", context->FRAME.PSM); - str[2] = format("%d", context->ZBUF.PSM); - - D3D_SHADER_MACRO macro[] = - { - {"PS_BATCH_SIZE", str[0].c_str()}, - {"PS_FPSM", str[1].c_str()}, - {"PS_ZPSM", str[2].c_str()}, - {NULL, NULL}, - }; - - vector shader; - theApp.LoadResource(IDR_CS_FX, shader); - dev->CompileShader((const char *)shader.data(), shader.size(), "cs.fx", nullptr, "ps_main1", macro, &ps[1]); - - m_ps1[ps_sel] = ps[1]; - } - - PSConstantBuffer ps_cb; - - ps_cb.fm = fm; - ps_cb.zm = zm; - - ctx->UpdateSubresource(m_ps_cb, 0, NULL, &ps_cb, 0, 0); // TODO: only update if changed - - OffsetBuffer* fzbo = NULL; - - GetOffsetBuffer(&fzbo); - - dev->PSSetShaderResourceView(0, fzbo->row_srv); - dev->PSSetShaderResourceView(1, fzbo->col_srv); - // TODO: palette, texture - - int step = PS_BATCH_SIZE * GSUtil::GetVertexCount(PRIM->PRIM); - - for(uint32 i = 0; i < m_index.tail; i += step) - { - dev->IASetPrimitiveTopology(topology); - dev->GSSetShader(gs[0]); - dev->PSSetShader(ps[0], m_ps_cb); - dev->DrawIndexedPrimitive(i, std::min(m_index.tail - i, step)); - - dev->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_LINELIST); - dev->GSSetShader(gs[1]); - dev->PSSetShader(ps[1], m_ps_cb); - dev->DrawIndexedPrimitive(m_index.tail, 2); - - //printf("%d/%d, %d %d %d %d\n", i, m_index.tail, r2.x, r2.y, r2.z, r2.w); - } - - dev->EndScene(); - - if(0) - { - std::string s; - /* - s = format("c:\\temp1\\_%05d_f%lld_fb0_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), 0, 0); - m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024); - Read(m_mem.GetOffset(0, 16, PSM_PSMCT32), GSVector4i(0, 0, 1024, 1024), false); - */ - // - if(fm != 0xffffffff) Read(context->offset.fb, r, false); - // - if(zm != 0xffffffff) Read(context->offset.zb, r, false); - - s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); - m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); - - s = format("c:\\temp1\\_%05d_f%lld_zt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); - m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); - - /* - s = format("c:\\temp1\\_%05d_f%lld_fb1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), 0, 0); - m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024); - */ - - s_n++; - } -} - -void GSRendererCS::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) -{ - GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); - - Read(off, r, true); // TODO: fully overwritten pages are not needed to be read, only invalidated (important) - - // TODO: false deps, 8H/4HL/4HH texture sharing pages with 24-bit target - // TODO: invalidate texture cache -} - -void GSRendererCS::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) -{ - GSOffset* off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); - - Read(off, r, false); -} - -void GSRendererCS::Write(GSOffset* off, const GSVector4i& r) -{ - GSDevice11* dev = (GSDevice11*)m_dev; - - ID3D11DeviceContext* ctx = *dev; - - D3D11_BOX box; - - memset(&box, 0, sizeof(box)); - - box.right = 1; - box.bottom = 1; - box.back = 1; - - uint32* pages = off->GetPages(r); - - for(size_t i = 0; pages[i] != GSOffset::EOP; i++) - { - uint32 page = pages[i]; - - uint32 row = page >> 5; - uint32 col = 1 << (page & 31); - - if((m_vm_valid[row] & col) == 0) - { - m_vm_valid[row] |= col; - - box.left = page * PAGE_SIZE; - box.right = (page + 1) * PAGE_SIZE; - - ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + page * PAGE_SIZE, 0, 0); -/* - // m_vm texture row is 2k in bytes, one page is 8k => starting row: addr / 4k, number of rows: 8k / 2k = 4 - - box.left = 0; - box.right = PAGE_SIZE; - box.top = page; - box.bottom = box.top + 1; - - ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + page * PAGE_SIZE, 0, 0); -*/ - if(0) - printf("[%lld] write %05x %d %d (%d)\n", __rdtsc(), off->bp, off->bw, off->psm, page); - } - } - - delete [] pages; -} - -void GSRendererCS::Read(GSOffset* off, const GSVector4i& r, bool invalidate) -{ - GSDevice11* dev = (GSDevice11*)m_dev; - - ID3D11DeviceContext* ctx = *dev; - - D3D11_BOX box; - - memset(&box, 0, sizeof(box)); - - box.right = 1; - box.bottom = 1; - box.back = 1; - - uint32* pages = off->GetPages(r); - - for(size_t i = 0; pages[i] != GSOffset::EOP; i++) - { - uint32 page = pages[i]; - - uint32 row = page >> 5; - uint32 col = 1 << (page & 31); - - if(m_vm_valid[row] & col) - { - if(invalidate) - { - m_vm_valid[row] ^= col; - } - - box.left = page * PAGE_SIZE; - box.right = (page + 1) * PAGE_SIZE; - - ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box); -/* - // m_vm texture row is 2k in bytes, one page is 8k => starting row: addr / 4k, number of rows: 8k / 2k = 4 - - box.left = 0; - box.right = PAGE_SIZE; - box.top = page; - box.bottom = box.top + 1; - - ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box); -*/ - D3D11_MAPPED_SUBRESOURCE map; - - if(SUCCEEDED(ctx->Map(m_pb, 0, D3D11_MAP_READ, 0, &map))) - { - memcpy(m_mem.m_vm8 + page * PAGE_SIZE, map.pData, PAGE_SIZE); - - ctx->Unmap(m_pb, 0); - - if(0) - printf("[%lld] read %05x %d %d (%d)\n", __rdtsc(), off->bp, off->bw, off->psm, page); - } - } - } - - delete [] pages; -} - -bool GSRendererCS::GetOffsetBuffer(OffsetBuffer** fzbo) -{ - HRESULT hr; - - GSDevice11* dev = (GSDevice11*)m_dev; - - D3D11_BUFFER_DESC bd; - D3D11_SHADER_RESOURCE_VIEW_DESC srvd; - D3D11_SUBRESOURCE_DATA data; - - hash_map::iterator i = m_offset.find(m_context->offset.fzb->hash); - - if(i == m_offset.end()) - { - OffsetBuffer ob; - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(GSVector2i) * 2048; - bd.Usage = D3D11_USAGE_IMMUTABLE; - bd.BindFlags = D3D11_BIND_SHADER_RESOURCE; - - memset(&data, 0, sizeof(data)); - - data.pSysMem = m_context->offset.fzb->row; - - hr = (*dev)->CreateBuffer(&bd, &data, &ob.row); - - if(FAILED(hr)) return false; - - data.pSysMem = m_context->offset.fzb->col; - - hr = (*dev)->CreateBuffer(&bd, &data, &ob.col); - - if(FAILED(hr)) return false; - - memset(&srvd, 0, sizeof(srvd)); - - srvd.Format = DXGI_FORMAT_R32G32_SINT; - srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; - srvd.Buffer.FirstElement = 0; - srvd.Buffer.NumElements = 2048; - - hr = (*dev)->CreateShaderResourceView(ob.row, &srvd, &ob.row_srv); - - if(FAILED(hr)) return false; - - hr = (*dev)->CreateShaderResourceView(ob.col, &srvd, &ob.col_srv); - - if(FAILED(hr)) return false; - - m_offset[m_context->offset.fzb->hash] = ob; - - i = m_offset.find(m_context->offset.fzb->hash); - } - - *fzbo = &i->second; - - return true; -} diff --git a/plugins/GSdx_legacy/GSRendererCS.h b/plugins/GSdx_legacy/GSRendererCS.h deleted file mode 100644 index 185356c658..0000000000 --- a/plugins/GSdx_legacy/GSRendererCS.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRenderer.h" -#include "GSDevice11.h" - -class GSRendererCS : public GSRenderer -{ - struct VSSelector - { - union - { - struct - { - uint32 tme:1; - uint32 fst:1; - }; - - uint32 key; - }; - - operator uint32() {return key & 0x3;} - - VSSelector() : key(0) {} - }; - - __aligned(struct, 32) VSConstantBuffer - { - GSVector4 VertexScale; - GSVector4 VertexOffset; - }; - - struct GSSelector - { - union - { - struct - { - uint32 iip:1; - uint32 prim:2; - }; - - uint32 key; - }; - - operator uint32() {return key & 0x7;} - - GSSelector() : key(0) {} - }; - - struct PSSelector - { - union - { - struct - { - uint32 fpsm:6; - uint32 zpsm:6; - }; - - uint32 key; - }; - - operator uint32() {return key & 0x3ff;} - - PSSelector() : key(0) {} - }; - - __aligned(struct, 32) PSConstantBuffer - { - uint32 fm; - uint32 zm; - }; - - CComPtr m_dss; - CComPtr m_bs; - CComPtr m_ss; - CComPtr m_lb; - CComPtr m_lb_uav; - CComPtr m_lb_srv; - CComPtr m_sob; - CComPtr m_sob_uav; - CComPtr m_sob_srv; - CComPtr m_vm; - //CComPtr m_vm; - CComPtr m_vm_uav; - uint32 m_vm_valid[16]; - CComPtr m_pb; - //CComPtr m_pb; - hash_map m_vs; - CComPtr m_vs_cb; - hash_map > m_gs; - CComPtr m_ps0; - hash_map > m_ps1; - CComPtr m_ps_cb; - - void Write(GSOffset* off, const GSVector4i& r); - void Read(GSOffset* off, const GSVector4i& r, bool invalidate); - - struct OffsetBuffer - { - CComPtr row, col; - CComPtr row_srv, col_srv; - }; - - hash_map m_offset; - - bool GetOffsetBuffer(OffsetBuffer** fzbo); - -protected: - GSTexture* m_texture[2]; - uint8* m_output; - - bool CreateDevice(GSDevice* dev); - void ResetDevice(); - void VSync(int field); - GSTexture* GetOutput(int i); - void Draw(); - void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); - void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut); - -public: - GSRendererCS(); - virtual ~GSRendererCS(); -}; diff --git a/plugins/GSdx_legacy/GSRendererDX.cpp b/plugins/GSdx_legacy/GSRendererDX.cpp deleted file mode 100644 index 531d3e4ecd..0000000000 --- a/plugins/GSdx_legacy/GSRendererDX.cpp +++ /dev/null @@ -1,530 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererDX.h" -#include "GSDeviceDX.h" - -GSRendererDX::GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter) - : GSRendererHW(tc) - , m_pixelcenter(pixelcenter) -{ - m_logz = !!theApp.GetConfig("logz", 0); - m_fba = !!theApp.GetConfig("fba", 1); - - UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0) && !!theApp.GetConfig("UserHacks", 0); - UserHacks_AlphaStencil = !!theApp.GetConfig("UserHacks_AlphaStencil", 0) && !!theApp.GetConfig("UserHacks", 0); - - UserHacks_TCOffset = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_TCOffset", 0) : 0; - UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f; - UserHacks_TCO_y = ((UserHacks_TCOffset >> 16) & 0xFFFF) / -1000.0f; -} - -GSRendererDX::~GSRendererDX() -{ -} - -void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) -{ - GSDrawingEnvironment& env = m_env; - GSDrawingContext* context = m_context; - - const GSVector2i& rtsize = ds ? ds->GetSize() : rt->GetSize(); - const GSVector2& rtscale = ds ? ds->GetScale() : rt->GetScale(); - - bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; - - GSTexture* rtcopy = NULL; - - ASSERT(m_dev != NULL); - - GSDeviceDX* dev = (GSDeviceDX*)m_dev; - - if(DATE) - { - if(dev->HasStencil()) - { - GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); - GSVector4 off = GSVector4(-1.0f, 1.0f); - - GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + off.xxyy()) * s.xyxy()).sat(off.zzyy()); - GSVector4 dst = src * 2.0f + off.xxxx(); - - GSVertexPT1 vertices[] = - { - {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, - {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, - {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, - {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, - }; - - dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); - } - else - { - rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); - - // I'll use VertexTrace when I consider it more trustworthy - - dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); - } - } - - // - - dev->BeginScene(); - - // om - - GSDeviceDX::OMDepthStencilSelector om_dssel; - - if(context->TEST.ZTE) - { - om_dssel.ztst = context->TEST.ZTST; - om_dssel.zwe = !context->ZBUF.ZMSK; - } - else - { - om_dssel.ztst = ZTST_ALWAYS; - } - - if(m_fba) - { - om_dssel.fba = context->FBA.FBA; - } - - GSDeviceDX::OMBlendSelector om_bsel; - - if(!IsOpaque()) - { - om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; - - om_bsel.a = context->ALPHA.A; - om_bsel.b = context->ALPHA.B; - om_bsel.c = context->ALPHA.C; - om_bsel.d = context->ALPHA.D; - - if(env.PABE.PABE) - { - if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) - { - // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader - // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result - - om_bsel.abe = 0; - } - else - { - //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. - //ASSERT(0); - } - } - } - - om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); - - // vs - - GSDeviceDX::VSSelector vs_sel; - - vs_sel.tme = PRIM->TME; - vs_sel.fst = PRIM->FST; - vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; - vs_sel.rtcopy = !!rtcopy; - - // The real GS appears to do no masking based on the Z buffer format and writing larger Z values - // than the buffer supports seems to be an error condition on the real GS, causing it to crash. - // We are probably receiving bad coordinates from VU1 in these cases. - - if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) - { - if(context->ZBUF.PSM == PSM_PSMZ24) - { - if(m_vt.m_max.p.z > 0xffffff) - { - ASSERT(m_vt.m_min.p.z > 0xffffff); - // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. - if (m_vt.m_min.p.z > 0xffffff) - { - vs_sel.bppz = 1; - om_dssel.ztst = ZTST_ALWAYS; - } - } - } - else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) - { - if(m_vt.m_max.p.z > 0xffff) - { - ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo - // Fixme : Same as above, I guess. - if (m_vt.m_min.p.z > 0xffff) - { - vs_sel.bppz = 2; - om_dssel.ztst = ZTST_ALWAYS; - } - } - } - } - - GSDeviceDX::VSConstantBuffer vs_cb; - - float sx = 2.0f * rtscale.x / (rtsize.x << 4); - float sy = 2.0f * rtscale.y / (rtsize.y << 4); - float ox = (float)(int)context->XYOFFSET.OFX; - float oy = (float)(int)context->XYOFFSET.OFY; - float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; - float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; - - //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, - //because DX10 and DX9 have a different pixel center.) - // - //The resulting shifted output aligns better with common blending / corona / blurring effects, - //but introduces a few bad pixels on the edges. - - if(rt && rt->LikelyOffset) - { - // DX9 has pixelcenter set to 0.0, so give it some value here - - if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } - - ox2 *= rt->OffsetHack_modx; - oy2 *= rt->OffsetHack_mody; - } - - vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); - vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); - - // gs - - GSDeviceDX::GSSelector gs_sel; - - gs_sel.iip = PRIM->IIP; - gs_sel.prim = m_vt.m_primclass; - - // ps - - GSDeviceDX::PSSelector ps_sel; - GSDeviceDX::PSSamplerSelector ps_ssel; - GSDeviceDX::PSConstantBuffer ps_cb; - - // Gregory: code is not yet ready so let's only enable it when - // CRC is below the FULL level - if (m_texture_shuffle && (m_crc_hack_level < 3)) { - ps_sel.shuffle = 1; - ps_sel.fmt = 0; - - const GIFRegXYOFFSET& o = m_context->XYOFFSET; - GSVertex* v = &m_vertex.buff[0]; - size_t count = m_vertex.next; - - // vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors - int pos = (v[0].XYZ.X - o.OFX) & 0xFF; - bool write_ba = (pos > 112 && pos < 136); - // Read texture is 8 to 16 pixels (same as above) - int tex_pos = v[0].U & 0xFF; - ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144); - - GL_INS("Color shuffle %s => %s", ps_sel.read_ba ? "BA" : "RG", write_ba ? "BA" : "RG"); - - // Convert the vertex info to a 32 bits color format equivalent - for (size_t i = 0; i < count; i += 2) { - if (write_ba) - v[i].XYZ.X -= 128u; - else - v[i + 1].XYZ.X += 128u; - - if (ps_sel.read_ba) - v[i].U -= 128u; - else - v[i + 1].U += 128u; - - // Height is too big (2x). - int tex_offset = v[i].V & 0xF; - GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); - - GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V); - tmp = GSVector4i(tmp - offset).srl32(1) + offset; - - v[i].XYZ.Y = tmp.x; - v[i].V = tmp.y; - v[i + 1].XYZ.Y = tmp.z; - v[i + 1].V = tmp.w; - } - - // Please bang my head against the wall! - // 1/ Reduce the frame mask to a 16 bit format - const uint32& m = context->FRAME.FBMSK; - uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 31) & 0x8000); - om_bsel.wrgba = 0; - - // 2 Select the new mask (Please someone put SSE here) - if ((fbmask & 0xFF) == 0) { - if (write_ba) - om_bsel.wb = 1; - else - om_bsel.wr = 1; - } - else if ((fbmask & 0xFF) != 0xFF) { -#ifdef _DEBUG - fprintf(stderr, "Please fix me! wb %d wr %d\n", om_bsel.wb, om_bsel.wr); -#endif - //ASSERT(0); - } - - fbmask >>= 8; - if ((fbmask & 0xFF) == 0) { - if (write_ba) - om_bsel.wa = 1; - else - om_bsel.wg = 1; - } - else if ((fbmask & 0xFF) != 0xFF) { -#ifdef _DEBUG - fprintf(stderr, "Please fix me! wa %d wg %d\n", om_bsel.wa, om_bsel.wg); -#endif - //ASSERT(0); - } - - } - else { - //ps_sel.fmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; - - om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); - } - - if(DATE) - { - if(dev->HasStencil()) - { - om_dssel.date = 1; - } - else - { - ps_sel.date = 1 + context->TEST.DATM; - } - } - - if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - ps_sel.colclip = 1; - } - - ps_sel.clr1 = om_bsel.IsCLR1(); - ps_sel.fba = context->FBA.FBA; - ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; - ps_sel.aout &= !ps_sel.shuffle; - if(UserHacks_AlphaHack) ps_sel.aout = 1; - - if(PRIM->FGE) - { - ps_sel.fog = 1; - - ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; - } - - if(context->TEST.ATE) - ps_sel.atst = context->TEST.ATST; - else - ps_sel.atst = ATST_ALWAYS; - - if (context->TEST.ATE && context->TEST.ATST > 1) - ps_cb.FogColor_AREF.a = (float)context->TEST.AREF; - - // Destination alpha pseudo stencil hack: use a stencil operation combined with an alpha test - // to only draw pixels which would cause the destination alpha test to fail in the future once. - // Unfortunately this also means only drawing those pixels at all, which is why this is a hack. - // The interaction with FBA in D3D9 is probably less than ideal. - if (UserHacks_AlphaStencil && DATE && dev->HasStencil() && om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == 1)) - { - if (!context->FBA.FBA) - { - if (context->TEST.DATM == 0) - ps_sel.atst = 5; // >= - else - ps_sel.atst = 2; // < - ps_cb.FogColor_AREF.a = (float)0x80; - } - if (!(context->FBA.FBA && context->TEST.DATM == 1)) - om_dssel.alpha_stencil = 1; - } - - if(tex) - { - const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[context->TEX0.PSM]; - const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[context->TEX0.CPSM] : psm; - bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0; - bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3; - // Don't force extra filtering on sprite (it creates various upscaling issue) - bilinear &= !((m_vt.m_primclass == GS_SPRITE_CLASS) && m_userhacks_round_sprite_offset && !m_vt.IsLinear()); - - ps_sel.wms = context->CLAMP.WMS; - ps_sel.wmt = context->CLAMP.WMT; - if (ps_sel.shuffle) { - ps_sel.fmt = 0; - } else { - ps_sel.fmt = tex->m_palette ? cpsm.fmt | 4 : cpsm.fmt; - } - ps_sel.aem = env.TEXA.AEM; - ps_sel.tfx = context->TEX0.TFX; - ps_sel.tcc = context->TEX0.TCC; - ps_sel.ltf = bilinear && !simple_sample; - ps_sel.rt = tex->m_target; - ps_sel.spritehack = tex->m_spritehack_t; - ps_sel.point_sampler = !(bilinear && simple_sample); - - int w = tex->m_texture->GetWidth(); - int h = tex->m_texture->GetHeight(); - - int tw = (int)(1 << context->TEX0.TW); - int th = (int)(1 << context->TEX0.TH); - - GSVector4 WH(tw, th, w, h); - - if(PRIM->FST) - { - vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); - //Maybe better? - //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); - ps_sel.fst = 1; - } - - ps_cb.WH = WH; - ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); - ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); - - // TC Offset Hack - ps_sel.tcoffsethack = !!UserHacks_TCOffset; - ps_cb.TC_OffsetHack = GSVector4(UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy(); - - GSVector4 clamp(ps_cb.MskFix); - GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); - - ps_cb.MinMax = clamp / WH.xyxy(); - ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); - - ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; - ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; - ps_ssel.ltf = bilinear && simple_sample; - } - else - { - ps_sel.tfx = 4; - } - - // rs - - GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); - - dev->OMSetRenderTargets(rt, ds, &scissor); - dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL); - dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL); - dev->PSSetShaderResource(2, rtcopy); - - uint8 afix = context->ALPHA.FIX; - - SetupIA(); - - dev->SetupOM(om_dssel, om_bsel, afix); - dev->SetupVS(vs_sel, &vs_cb); - dev->SetupGS(gs_sel); - dev->SetupPS(ps_sel, &ps_cb, ps_ssel); - - // draw - - if(context->TEST.DoFirstPass()) - { - dev->DrawIndexedPrimitive(); - - if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - GSDeviceDX::OMBlendSelector om_bselneg(om_bsel); - GSDeviceDX::PSSelector ps_selneg(ps_sel); - - om_bselneg.negative = 1; - ps_selneg.colclip = 2; - - dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); - - dev->DrawIndexedPrimitive(); - dev->SetupOM(om_dssel, om_bsel, afix); - } - } - - if(context->TEST.DoSecondPass()) - { - ASSERT(!env.PABE.PABE); - - static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; - - ps_sel.atst = iatst[ps_sel.atst]; - - dev->SetupPS(ps_sel, &ps_cb, ps_ssel); - - bool z = om_dssel.zwe; - bool r = om_bsel.wr; - bool g = om_bsel.wg; - bool b = om_bsel.wb; - bool a = om_bsel.wa; - - switch(context->TEST.AFAIL) - { - case 0: z = r = g = b = a = false; break; // none - case 1: z = false; break; // rgba - case 2: r = g = b = a = false; break; // z - case 3: z = a = false; break; // rgb - default: __assume(0); - } - - if(z || r || g || b || a) - { - om_dssel.zwe = z; - om_bsel.wr = r; - om_bsel.wg = g; - om_bsel.wb = b; - om_bsel.wa = a; - - dev->SetupOM(om_dssel, om_bsel, afix); - - dev->DrawIndexedPrimitive(); - - if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - GSDeviceDX::OMBlendSelector om_bselneg(om_bsel); - GSDeviceDX::PSSelector ps_selneg(ps_sel); - - om_bselneg.negative = 1; - ps_selneg.colclip = 2; - - dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); - - dev->DrawIndexedPrimitive(); - } - } - } - - dev->EndScene(); - - dev->Recycle(rtcopy); - - if(om_dssel.fba) UpdateFBA(rt); -} diff --git a/plugins/GSdx_legacy/GSRendererDX.h b/plugins/GSdx_legacy/GSRendererDX.h deleted file mode 100644 index a9071b8597..0000000000 --- a/plugins/GSdx_legacy/GSRendererDX.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRendererHW.h" - -class GSRendererDX : public GSRendererHW -{ - GSVector2 m_pixelcenter; - bool m_logz; - bool m_fba; - - bool UserHacks_AlphaHack; - bool UserHacks_AlphaStencil; - -protected: - virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); - virtual void SetupIA() = 0; - virtual void UpdateFBA(GSTexture* rt) {} - - unsigned int UserHacks_TCOffset; - float UserHacks_TCO_x, UserHacks_TCO_y; - -public: - GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0)); - virtual ~GSRendererDX(); - -}; diff --git a/plugins/GSdx_legacy/GSRendererDX11.cpp b/plugins/GSdx_legacy/GSRendererDX11.cpp deleted file mode 100644 index 4eeb93734b..0000000000 --- a/plugins/GSdx_legacy/GSRendererDX11.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererDX11.h" -#include "GSCrc.h" -#include "resource.h" - -GSRendererDX11::GSRendererDX11() - : GSRendererDX(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f)) -{ -} - -bool GSRendererDX11::CreateDevice(GSDevice* dev) -{ - if(!__super::CreateDevice(dev)) - return false; - - return true; -} - -void GSRendererDX11::SetupIA() -{ - GSDevice11* dev = (GSDevice11*)m_dev; - - void* ptr = NULL; - - if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next)) - { - GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); - - if(UserHacks_WildHack && !isPackedUV_HackFlag) - { - GSVertex* RESTRICT d = (GSVertex*)ptr; - - for(unsigned int i = 0; i < m_vertex.next; i++) - { - if(PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF; - } - } - - dev->IAUnmapVertexBuffer(); - } - - dev->IASetIndexBuffer(m_index.buff, m_index.tail); - - D3D11_PRIMITIVE_TOPOLOGY t; - - switch(m_vt.m_primclass) - { - case GS_POINT_CLASS: - t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; - break; - case GS_TRIANGLE_CLASS: - t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - break; - default: - __assume(0); - } - - dev->IASetPrimitiveTopology(t); -} diff --git a/plugins/GSdx_legacy/GSRendererDX11.h b/plugins/GSdx_legacy/GSRendererDX11.h deleted file mode 100644 index 53d1021222..0000000000 --- a/plugins/GSdx_legacy/GSRendererDX11.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRendererDX.h" -#include "GSVertexHW.h" -#include "GSTextureCache11.h" - -class GSRendererDX11 : public GSRendererDX -{ -protected: - void SetupIA(); - -public: - GSRendererDX11(); - virtual ~GSRendererDX11() {} - - bool CreateDevice(GSDevice* dev); -}; diff --git a/plugins/GSdx_legacy/GSRendererDX9.cpp b/plugins/GSdx_legacy/GSRendererDX9.cpp deleted file mode 100644 index fa078e7646..0000000000 --- a/plugins/GSdx_legacy/GSRendererDX9.cpp +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererDX9.h" -#include "GSCrc.h" -#include "resource.h" - -GSRendererDX9::GSRendererDX9() - : GSRendererDX(new GSTextureCache9(this)) -{ -} - -bool GSRendererDX9::CreateDevice(GSDevice* dev) -{ - if(!__super::CreateDevice(dev)) - return false; - - // - - memset(&m_fba.dss, 0, sizeof(m_fba.dss)); - - m_fba.dss.StencilEnable = true; - m_fba.dss.StencilReadMask = 2; - m_fba.dss.StencilWriteMask = 2; - m_fba.dss.StencilFunc = D3DCMP_EQUAL; - m_fba.dss.StencilPassOp = D3DSTENCILOP_ZERO; - m_fba.dss.StencilFailOp = D3DSTENCILOP_ZERO; - m_fba.dss.StencilDepthFailOp = D3DSTENCILOP_ZERO; - m_fba.dss.StencilRef = 2; - - memset(&m_fba.bs, 0, sizeof(m_fba.bs)); - - m_fba.bs.RenderTargetWriteMask = D3DCOLORWRITEENABLE_ALPHA; - - // - - return true; -} - -void GSRendererDX9::SetupIA() -{ - D3DPRIMITIVETYPE topology; - - switch(m_vt.m_primclass) - { - case GS_POINT_CLASS: - - topology = D3DPT_POINTLIST; - - break; - - case GS_LINE_CLASS: - - topology = D3DPT_LINELIST; - - if(PRIM->IIP == 0) - { - for(size_t i = 0, j = m_index.tail; i < j; i += 2) - { - uint32 tmp = m_index.buff[i + 0]; - m_index.buff[i + 0] = m_index.buff[i + 1]; - m_index.buff[i + 1] = tmp; - } - } - - break; - - case GS_TRIANGLE_CLASS: - - topology = D3DPT_TRIANGLELIST; - - if(PRIM->IIP == 0) - { - for(size_t i = 0, j = m_index.tail; i < j; i += 3) - { - uint32 tmp = m_index.buff[i + 0]; - m_index.buff[i + 0] = m_index.buff[i + 2]; - m_index.buff[i + 2] = tmp; - } - } - - break; - - case GS_SPRITE_CLASS: - - topology = D3DPT_TRIANGLELIST; - - // each sprite converted to quad needs twice the space - - while(m_vertex.tail * 2 > m_vertex.maxcount) - { - GrowVertexBuffer(); - } - - // assume vertices are tightly packed and sequentially indexed (it should be the case) - - if(m_vertex.next >= 2) - { - size_t count = m_vertex.next; - - int i = (int)count * 2 - 4; - GSVertex* s = &m_vertex.buff[count - 2]; - GSVertex* q = &m_vertex.buff[count * 2 - 4]; - uint32* RESTRICT index = &m_index.buff[count * 3 - 6]; - - for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) - { - GSVertex v0 = s[0]; - GSVertex v1 = s[1]; - - v0.RGBAQ = v1.RGBAQ; - v0.XYZ.Z = v1.XYZ.Z; - v0.FOG = v1.FOG; - - q[0] = v0; - q[3] = v1; - - // swap x, s, u - - uint16 x = v0.XYZ.X; - v0.XYZ.X = v1.XYZ.X; - v1.XYZ.X = x; - - float s = v0.ST.S; - v0.ST.S = v1.ST.S; - v1.ST.S = s; - - uint16 u = v0.U; - v0.U = v1.U; - v1.U = u; - - q[1] = v0; - q[2] = v1; - - index[0] = i + 0; - index[1] = i + 1; - index[2] = i + 2; - index[3] = i + 1; - index[4] = i + 2; - index[5] = i + 3; - } - - m_vertex.head = m_vertex.tail = m_vertex.next = count * 2; - m_index.tail = count * 3; - } - - break; - - default: - __assume(0); - } - - GSDevice9* dev = (GSDevice9*)m_dev; - - (*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO - - void* ptr = NULL; - - if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next)) - { - GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff; - GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr; - - for(uint32 i = 0; i < m_vertex.next; i++, s++, d++) - { - GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); - - if(PRIM->TME && !PRIM->FST) - { - p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); - } - else - { - p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); - } - - GSVector4 t = GSVector4::zero(); - - if(PRIM->TME) - { - if(PRIM->FST) - { - if(UserHacks_WildHack && !isPackedUV_HackFlag) - { - t = GSVector4(GSVector4i::load(s->UV & 0x3FEF3FEF).upl16()); - //printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : ""); - } - else - { - t = GSVector4(GSVector4i::load(s->UV).upl16()); - } - } - else - { - t = GSVector4::loadl(&s->ST); - } - } - - t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); - - d->p = p; - d->t = t; - } - - dev->IAUnmapVertexBuffer(); - } - - dev->IASetIndexBuffer(m_index.buff, m_index.tail); - - dev->IASetPrimitiveTopology(topology); -} - -void GSRendererDX9::UpdateFBA(GSTexture* rt) -{ - if (!rt) - return; - - GSDevice9* dev = (GSDevice9*)m_dev; - - dev->BeginScene(); - - // om - - dev->OMSetDepthStencilState(&m_fba.dss); - dev->OMSetBlendState(&m_fba.bs, 0); - - // ia - - GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight()); - GSVector4 off = GSVector4(-1.0f, 1.0f); - - GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + off.xxyy()) * s.xyxy()).sat(off.zzyy()); - GSVector4 dst = src * 2.0f + off.xxxx(); - - GSVertexPT1 vertices[] = - { - {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(0, 0)}, - {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(0, 0)}, - {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(0, 0)}, - {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(0, 0)}, - }; - - dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); - dev->IASetInputLayout(dev->m_convert.il); - dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); - - // vs - - dev->VSSetShader(dev->m_convert.vs, NULL, 0); - - // ps - - dev->PSSetShader(dev->m_convert.ps[4], NULL, 0); - - // - - dev->DrawPrimitive(); - - // - - dev->EndScene(); -} diff --git a/plugins/GSdx_legacy/GSRendererDX9.h b/plugins/GSdx_legacy/GSRendererDX9.h deleted file mode 100644 index c7e3b8c8b2..0000000000 --- a/plugins/GSdx_legacy/GSRendererDX9.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRendererDX.h" -#include "GSVertexHW.h" -#include "GSTextureCache9.h" - -class GSRendererDX9 : public GSRendererDX -{ -protected: - struct - { - Direct3DDepthStencilState9 dss; - Direct3DBlendState9 bs; - } m_fba; - - void SetupIA(); - void UpdateFBA(GSTexture* rt); - -public: - GSRendererDX9(); - virtual ~GSRendererDX9() {} - - bool CreateDevice(GSDevice* dev); -}; diff --git a/plugins/GSdx_legacy/GSRendererHW.cpp b/plugins/GSdx_legacy/GSRendererHW.cpp deleted file mode 100644 index d4733bea24..0000000000 --- a/plugins/GSdx_legacy/GSRendererHW.cpp +++ /dev/null @@ -1,1393 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererHW.h" - -GSRendererHW::GSRendererHW(GSTextureCache* tc) - : m_width(1280) - , m_height(1024) - , m_skip(0) - , m_reset(false) - , m_upscale_multiplier(1) - , m_tc(tc) -{ - m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1); - m_userhacks_skipdraw = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SkipDraw", 0) : 0; - m_userhacks_align_sprite_X = !!theApp.GetConfig("UserHacks_align_sprite_X", 0) && !!theApp.GetConfig("UserHacks", 0); - m_userhacks_round_sprite_offset = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_round_sprite_offset", 0) : 0; - m_userhacks_disable_gs_mem_clear = theApp.GetConfig("UserHacks_DisableGsMemClear", 0) && theApp.GetConfig("UserHacks", 0); - - if (!m_upscale_multiplier) { //Custom Resolution - m_width = theApp.GetConfig("resx", m_width); - m_height = theApp.GetConfig("resy", m_height); - } - - if (m_upscale_multiplier == 1) { // hacks are only needed for upscaling issues. - m_userhacks_round_sprite_offset = 0; - m_userhacks_align_sprite_X = 0; - } - -} - -void GSRendererHW::SetScaling() -{ - GSVector2i crtc_size(GetDisplayRect().width(), GetDisplayRect().height()); - - // Framebuffer width is always a multiple of 64 so at certain cases it can't cover some weird width values. - // 480P , 576P use width as 720 which is not referencable by FBW * 64. so it produces 704 ( the closest value multiple by 64). - // In such cases, let's just use the CRTC width. - int fb_width = max({ (int)m_context->FRAME.FBW * 64, crtc_size.x , 512 }); - // GS doesn't have a specific register for the FrameBuffer height. so we get the height - // from physical units of the display rectangle in case the game uses a heigher value of height. - int fb_height = (fb_width < 1024) ? max(512, crtc_size.y) : 1024; - - int upscaled_fb_w = fb_width * m_upscale_multiplier; - int upscaled_fb_h = fb_height * m_upscale_multiplier; - bool good_rt_size = m_width >= upscaled_fb_w && m_height >= upscaled_fb_h; - - // No need to resize for native/custom resolutions as default size will be enough for native and we manually get RT Buffer size for custom. - // don't resize until the display rectangle and register states are stabilized. - if ( m_upscale_multiplier <= 1 || good_rt_size) - return; - - m_tc->RemovePartial(); - m_width = upscaled_fb_w; - m_height = upscaled_fb_h; - printf("Frame buffer size set to %dx%d (%dx%d)\n", fb_width, fb_height , m_width, m_height); -} - -GSRendererHW::~GSRendererHW() -{ - delete m_tc; -} - -void GSRendererHW::SetGameCRC(uint32 crc, int options) -{ - GSRenderer::SetGameCRC(crc, options); - - m_hacks.SetGameCRC(m_game); -} - -bool GSRendererHW::CanUpscale() -{ - if(m_hacks.m_cu && !(this->*m_hacks.m_cu)()) - { - return false; - } - - return m_upscale_multiplier!=1 && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition) -} - -int GSRendererHW::GetUpscaleMultiplier() -{ - // Custom resolution (currently 0) needs an upscale multiplier of 1. - return m_upscale_multiplier ? m_upscale_multiplier : 1; -} - -GSVector2i GSRendererHW::GetInternalResolution() { - GSVector2i dr(GetDisplayRect().width(), GetDisplayRect().height()); - - if (m_upscale_multiplier) - return GSVector2i(dr.x * m_upscale_multiplier, dr.y * m_upscale_multiplier); - else - return GSVector2i(m_width, m_height); -} - -void GSRendererHW::Reset() -{ - // TODO: GSreset can come from the main thread too => crash - // m_tc->RemoveAll(); - - m_reset = true; - - GSRenderer::Reset(); -} - -void GSRendererHW::VSync(int field) -{ - //Check if the frame buffer width or display width has changed - SetScaling(); - - if(m_reset) - { - m_tc->RemoveAll(); - - m_reset = false; - } - - GSRenderer::VSync(field); - - m_tc->IncAge(); - - m_tc->PrintMemoryUsage(); - m_dev->PrintMemoryUsage(); - - m_skip = 0; -} - -void GSRendererHW::ResetDevice() -{ - m_tc->RemoveAll(); - - GSRenderer::ResetDevice(); -} - -GSTexture* GSRendererHW::GetOutput(int i) -{ - const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB; - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = DISPFB.Block(); - TEX0.TBW = DISPFB.FBW; - TEX0.PSM = DISPFB.PSM; - - // TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM); - - GSTexture* t = NULL; - - if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GetFrameRect(i).bottom)) - { - t = rt->m_texture; - -#ifndef NDEBUG - if(s_dump) - { - if(s_savef && s_n >= s_saven) - { - t->Save(root_hw + format("%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM)); - } - } - - s_n++; -#endif - } - - return t; -} - -void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) -{ - // printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM); - - m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r); -} - -void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) -{ - // printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM); - - if(clut) return; // FIXME - - m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); -} - -int GSRendererHW::Interpolate_UV(float alpha, int t0, int t1) -{ - float t = (1.0f - alpha) * t0 + alpha * t1; - return (int)t & ~0xF; // cheap rounding -} - -float GSRendererHW::alpha0(int L, int X0, int X1) -{ - float x = (X0 + 15) & ~0xF; // Round up - return (x - X0) / (float)L; -} - -float GSRendererHW::alpha1(int L, int X0, int X1) -{ - float x = (X1 - 1) & ~0xF; // Round down. Note -1 because right pixel isn't included in primitive so 0x100 must return 0. - return (x - X0) / (float)L; -} - -template -void GSRendererHW::RoundSpriteOffset() -{ -//#define DEBUG_U -//#define DEBUG_V -#if defined(DEBUG_V) || defined(DEBUG_U) - bool debug = linear; -#endif - size_t count = m_vertex.next; - GSVertex* v = &m_vertex.buff[0]; - - for(size_t i = 0; i < count; i += 2) { - // Performance note: if it had any impact on perf, someone would port it to SSE (AKA GSVector) - - // Compute the coordinate of first and last texels (in native with a linear filtering) - int ox = m_context->XYOFFSET.OFX; - int X0 = v[i].XYZ.X - ox; - int X1 = v[i+1].XYZ.X - ox; - int Lx = (v[i+1].XYZ.X - v[i].XYZ.X); - float ax0 = alpha0(Lx, X0, X1); - float ax1 = alpha1(Lx, X0, X1); - int tx0 = Interpolate_UV(ax0, v[i].U, v[i+1].U); - int tx1 = Interpolate_UV(ax1, v[i].U, v[i+1].U); -#ifdef DEBUG_U - if (debug) { - fprintf(stderr, "u0:%d and u1:%d\n", v[i].U, v[i+1].U); - fprintf(stderr, "a0:%f and a1:%f\n", ax0, ax1); - fprintf(stderr, "t0:%d and t1:%d\n", tx0, tx1); - } -#endif - - int oy = m_context->XYOFFSET.OFY; - int Y0 = v[i].XYZ.Y - oy; - int Y1 = v[i+1].XYZ.Y - oy; - int Ly = (v[i+1].XYZ.Y - v[i].XYZ.Y); - float ay0 = alpha0(Ly, Y0, Y1); - float ay1 = alpha1(Ly, Y0, Y1); - int ty0 = Interpolate_UV(ay0, v[i].V, v[i+1].V); - int ty1 = Interpolate_UV(ay1, v[i].V, v[i+1].V); -#ifdef DEBUG_V - if (debug) { - fprintf(stderr, "v0:%d and v1:%d\n", v[i].V, v[i+1].V); - fprintf(stderr, "a0:%f and a1:%f\n", ay0, ay1); - fprintf(stderr, "t0:%d and t1:%d\n", ty0, ty1); - } -#endif - -#ifdef DEBUG_U - if (debug) - fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].U, v[i+1].U); -#endif -#ifdef DEBUG_V - if (debug) - fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].V, v[i+1].V); -#endif - -#if 1 - // Use rounded value of the newly computed texture coordinate. It ensures - // that sampling will remains inside texture boundary - // - // Note for bilinear: by definition it will never work correctly! A sligh modification - // of interpolation migth trigger a discard (with alpha testing) - // Let's use something simple that correct really bad case (for a couple of 2D games). - // I hope it won't create too much glitches. - if (linear) { - int Lu = v[i+1].U - v[i].U; - // Note 32 is based on taisho-mononoke - if ((Lu > 0) && (Lu <= (Lx+32))) { - v[i+1].U -= 8; - } - } else { - if (tx0 <= tx1) { - v[i].U = tx0; - v[i+1].U = tx1 + 16; - } else { - v[i].U = tx0 + 15; - v[i+1].U = tx1; - } - } -#endif -#if 1 - if (linear) { - int Lv = v[i+1].V - v[i].V; - if ((Lv > 0) && (Lv <= (Ly+32))) { - v[i+1].V -= 8; - } - } else { - if (ty0 <= ty1) { - v[i].V = ty0; - v[i+1].V = ty1 + 16; - } else { - v[i].V = ty0 + 15; - v[i+1].V = ty1; - } - } -#endif - -#ifdef DEBUG_U - if (debug) - fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].U, v[i+1].U); -#endif -#ifdef DEBUG_V - if (debug) - fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].V, v[i+1].V); -#endif - - } -} - -void GSRendererHW::Draw() -{ - if(m_dev->IsLost() || GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) { - GL_INS("Warning skipping a draw call (%d)", s_n); - s_n += 3; // Keep it sync with SW renderer - return; - } - GL_PUSH("HW Draw %d", s_n); - - GSDrawingEnvironment& env = m_env; - GSDrawingContext* context = m_context; - - // It is allowed to use the depth and rt at the same location. However at least 1 must - // be disabled. - // 1/ GoW uses a Cd blending on a 24 bits buffer (no alpha) - // 2/ SuperMan really draws (0,0,0,0) color and a (0) 32-bits depth - // 3/ 50cents really draws (0,0,0,128) color and a (0) 24 bits depth - // Note: FF DoC has both buffer at same location but disable the depth test (write?) with ZTE = 0 - const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1)); - const bool no_ds = !no_rt && ( - // Depth is always pass (no read) and write are discarded (tekken 5). (Note: DATE is currently implemented with a stencil buffer) - (context->ZBUF.ZMSK && m_context->TEST.ZTST == ZTST_ALWAYS && !m_context->TEST.DATE) || - // Depth will be written through the RT - (context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && !context->ZBUF.ZMSK && !context->FRAME.FBMSK && context->TEST.ZTE) - ); - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = context->FRAME.Block(); - TEX0.TBW = context->FRAME.FBW; - TEX0.PSM = context->FRAME.PSM; - - GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); - GSTexture* rt_tex = rt ? rt->m_texture : NULL; - - TEX0.TBP0 = context->ZBUF.Block(); - TEX0.TBW = context->FRAME.FBW; - TEX0.PSM = context->ZBUF.PSM; - - GSTextureCache::Target* ds = no_ds ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); - GSTexture* ds_tex = ds ? ds->m_texture : NULL; - - if(!(rt || no_rt) || !(ds || no_ds)) - { - GL_POP(); - ASSERT(0); - return; - } - - GSTextureCache::Source* tex = NULL; - m_texture_shuffle = false; - - if(PRIM->TME) - { - /* - - // m_tc->LookupSource will mess with the palette, should not, but we do this after, until it is sorted out - - if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) - { - m_mem.m_clut.Read32(context->TEX0, env.TEXA); - } - - */ - - GSVector4i r; - - GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); - - tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); - - if(!tex) { - GL_POP(); - return; - } - - // FIXME: Could be removed on openGL - if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) - { - m_mem.m_clut.Read32(context->TEX0, env.TEXA); - } - - // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. - // Initially code also tested the RT but it gives too much false-positive - // - // Both input and output are 16 bits and texture was initially 32 bits! - m_texture_shuffle = (context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt; - - // Texture shuffle is not yet supported with strange clamp mode - ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3)); - } - if (rt) { - // Be sure texture shuffle detection is properly propagated - // Otherwise set or clear the flag (Code in texture cache only set the flag) - // Note: it is important to clear the flag when RT is used as a real 16 bits target. - rt->m_32_bits_fmt = m_texture_shuffle || !(context->FRAME.PSM & 0x2); - } - -#ifndef NDEBUG - if(s_dump) - { - uint64 frame = m_perfmon.GetFrame(); - - string s; - - if (s_n >= s_saven) { - // Dump Register state - s = format("%05d_context.txt", s_n); - - m_env.Dump(root_hw+s); - m_context->Dump(root_hw+s); - } - - if(s_savet && s_n >= s_saven && tex) - { - s = format("%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds", - s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM, - (int)context->CLAMP.WMS, (int)context->CLAMP.WMT, - (int)context->CLAMP.MINU, (int)context->CLAMP.MAXU, - (int)context->CLAMP.MINV, (int)context->CLAMP.MAXV); - - tex->m_texture->Save(root_hw+s, false, true); - - if(tex->m_palette) - { - s = format("%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM); - - tex->m_palette->Save(root_hw+s, false, true); - } - } - - s_n++; - - if(s_save && s_n >= s_saven) - { - s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); - - if (rt) - rt->m_texture->Save(root_hw+s); - } - - if(s_savez && s_n >= s_saven) - { - s = format("%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); - - if (ds_tex) - ds_tex->Save(root_hw+s); - } - - s_n++; - - } else { - s_n += 2; - } -#endif - - if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds_tex, tex)) - { - s_n += 1; // keep counter sync - GL_POP(); - return; - } - - if (!m_userhacks_disable_gs_mem_clear) { - OI_GsMemClear(); - } - - // skip alpha test if possible - - GIFRegTEST TEST = context->TEST; - GIFRegFRAME FRAME = context->FRAME; - GIFRegZBUF ZBUF = context->ZBUF; - - uint32 fm = context->FRAME.FBMSK; - uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; - - if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS) - { - if(GSRenderer::TryAlphaTest(fm, zm)) - { - context->TEST.ATST = ATST_ALWAYS; - } - } - - context->FRAME.FBMSK = fm; - context->ZBUF.ZMSK = zm != 0; - - // A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite - if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { - size_t count = m_vertex.next; - GSVertex* v = &m_vertex.buff[0]; - - // Hack to avoid vertical black line in various games (ace combat/tekken) - if (m_userhacks_align_sprite_X) { - // Note for performance reason I do the check only once on the first - // primitive - int win_position = v[1].XYZ.X - context->XYOFFSET.OFX; - const bool unaligned_position = ((win_position & 0xF) == 8); - const bool unaligned_texture = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful - const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X); - if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) { - // Normaly vertex are aligned on full pixels and texture in half - // pixels. Let's extend the coverage of an half-pixel to avoid - // hole after upscaling - for(size_t i = 0; i < count; i += 2) { - v[i+1].XYZ.X += 8; - // I really don't know if it is a good idea. Neither what to do for !PRIM->FST - if (unaligned_texture) - v[i+1].U += 8; - } - } - } - - if (PRIM->FST) { - if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) { - if (m_vt.IsLinear()) - RoundSpriteOffset(); - else - RoundSpriteOffset(); - } - } else { - ; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior) - } - } - - // - - DrawPrims(rt_tex, ds_tex, tex); - - // - - context->TEST = TEST; - context->FRAME = FRAME; - context->ZBUF = ZBUF; - - // - - GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); - - // Help to detect rendering outside of the framebuffer -#if _DEBUG - if (m_upscale_multiplier * r.z > m_width) { - GL_INS("ERROR: RT width is too small only %d but require %d", m_width, m_upscale_multiplier * r.z); - } - if (m_upscale_multiplier * r.w > m_height) { - GL_INS("ERROR: RT height is too small only %d but require %d", m_height, m_upscale_multiplier * r.w); - } -#endif - - if(fm != 0xffffffff && rt) - { - rt->m_valid = rt->m_valid.runion(r); - - m_tc->InvalidateVideoMem(context->offset.fb, r, false); - - m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block()); - } - - if(zm != 0xffffffff && ds) - { - ds->m_valid = ds->m_valid.runion(r); - - m_tc->InvalidateVideoMem(context->offset.zb, r, false); - - m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block()); - } - - // - - if(m_hacks.m_oo) - { - (this->*m_hacks.m_oo)(); - } - -#ifndef NDEBUG - if(s_dump) - { - uint64 frame = m_perfmon.GetFrame(); - - string s; - - if(s_save && s_n >= s_saven) - { - s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); - - if (rt) - rt->m_texture->Save(root_hw+s); - } - - if(s_savez && s_n >= s_saven) - { - s = format("%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); - - if (ds_tex) - ds_tex->Save(root_hw+s); - } - - s_n++; - - if(s_savel > 0 && (s_n - s_saven) > s_savel) - { - s_dump = 0; - } - } else { - s_n += 1; - } -#endif - - #ifdef DISABLE_HW_TEXTURE_CACHE - - if (rt) - m_tc->Read(rt, r); - - #endif - - GL_POP(); -} - -// hacks - -GSRendererHW::Hacks::Hacks() - : m_oi_map(m_oi_list) - , m_oo_map(m_oo_list) - , m_cu_map(m_cu_list) - , m_oi(NULL) - , m_oo(NULL) - , m_cu(NULL) -{ - m_oi_list.push_back(HackEntry(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII)); - m_oi_list.push_back(HackEntry(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX)); - m_oi_list.push_back(HackEntry(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6)); - m_oi_list.push_back(HackEntry(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2)); - m_oi_list.push_back(HackEntry(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame)); - m_oi_list.push_back(HackEntry(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden)); - m_oi_list.push_back(HackEntry(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS)); - m_oi_list.push_back(HackEntry(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger)); - m_oi_list.push_back(HackEntry(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger)); - m_oi_list.push_back(HackEntry(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2)); - m_oi_list.push_back(HackEntry(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed)); - m_oi_list.push_back(HackEntry(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown)); - m_oi_list.push_back(HackEntry(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine)); - m_oi_list.push_back(HackEntry(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts)); - m_oi_list.push_back(HackEntry(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning)); - m_oi_list.push_back(HackEntry(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight)); - m_oi_list.push_back(HackEntry(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia)); - m_oi_list.push_back(HackEntry(CRC::SMTNocturne, CRC::RegionCount, &GSRendererHW::OI_SMTNocturne)); - m_oi_list.push_back(HackEntry(CRC::SuperManReturns, CRC::RegionCount, &GSRendererHW::OI_SuperManReturns)); - - m_oo_list.push_back(HackEntry(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2)); - m_oo_list.push_back(HackEntry(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2)); - - m_cu_list.push_back(HackEntry(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2)); - m_cu_list.push_back(HackEntry(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2)); - m_cu_list.push_back(HackEntry(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss)); -} - -void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game) -{ - uint32 hash = (uint32)((game.region << 24) | game.title); - - m_oi = m_oi_map[hash]; - m_oo = m_oo_map[hash]; - m_cu = m_cu_map[hash]; - - if (game.flags & CRC::PointListPalette) { - ASSERT(m_oi == NULL); - - m_oi = &GSRendererHW::OI_PointListPalette; - } - - bool hack = theApp.GetConfig("UserHacks_ColorDepthClearOverlap", 0) && theApp.GetConfig("UserHacks", 0); - if (hack && !m_oi) { - // FIXME: Enable this code in the future. I think it could replace - // most of the "old" OI hack. So far code was tested on GoW2 & SimpsonsGame with - // success - m_oi = &GSRendererHW::OI_DoubleHalfClear; - } -} - -bool GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - if ((m_vt.m_primclass == GS_SPRITE_CLASS) && !PRIM->TME && !m_context->ZBUF.ZMSK && (m_context->FRAME.FBW >= 7) && rt) { - GSVertex* v = &m_vertex.buff[0]; - - //GL_INS("OI_DoubleHalfClear: psm:%x. Z:%d R:%d G:%d B:%d A:%d", m_context->FRAME.PSM, - // v[1].XYZ.Z, v[1].RGBAQ.R, v[1].RGBAQ.G, v[1].RGBAQ.B, v[1].RGBAQ.A); - - // Check it is a clear on the first primitive only - if (v[1].XYZ.Z || v[1].RGBAQ.R || v[1].RGBAQ.G || v[1].RGBAQ.B || v[1].RGBAQ.A) { - return true; - } - // Only 32 bits format is supported otherwise it is complicated - if (m_context->FRAME.PSM & 2) - return true; - - // FIXME might need some rounding - // In 32 bits pages are 64x32 pixels. In theory, it must be somethings - // like FBW * 64 pixels * ratio / 32 pixels / 2 = FBW * ratio - // It is hard to predict the ratio, so I round it to 1. And I use - // <= comparison below. - uint32 h_pages = m_context->FRAME.FBW; - - uint32 base; - uint32 half; - if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) { - base = m_context->ZBUF.ZBP; - half = m_context->FRAME.FBP; - } else { - base = m_context->FRAME.FBP; - half = m_context->ZBUF.ZBP; - } - - if (half <= (base + h_pages * m_context->FRAME.FBW)) { - //GL_INS("OI_DoubleHalfClear: base %x half %x. h_pages %d fbw %d", base, half, h_pages, m_context->FRAME.FBW); - if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) { - m_dev->ClearDepth(ds, 0); - } else { - m_dev->ClearRenderTarget(rt, 0); - } - // Don't return false, it will break the rendering. I guess that it misses texture - // invalidation - //return false; - } - } - return true; -} - -// Note: hack is safe, but it could impact the perf a little (normally games do only a couple of clear by frame) -void GSRendererHW::OI_GsMemClear() -{ - // Rectangle draw without texture - if ((m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && !PRIM->TME && !PRIM->ABE // Direct write - && !m_context->TEST.ATE // no alpha test - && (!m_context->TEST.ZTE || m_context->TEST.ZTST == ZTST_ALWAYS) // no depth test - && (m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(0))) // Constant 0 write - ) { - GL_INS("OI_GsMemClear"); - GSOffset* off = m_context->offset.fb; - GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in)); - - int format = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt; - - if (format == 0) { - // Based on WritePixel32 - for(int y = r.top; y < r.bottom; y++) - { - uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]]; - int* RESTRICT col = off->pixel.col[0]; - - for(int x = r.left; x < r.right; x++) - { - d[col[x]] = 0; // Here the constant color - } - } - } else if (format == 1) { - // Based on WritePixel24 - for(int y = r.top; y < r.bottom; y++) - { - uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]]; - int* RESTRICT col = off->pixel.col[0]; - - for(int x = r.left; x < r.right; x++) - { - d[col[x]] &= 0xff000000; // Clear the color - } - } - } else if (format == 2) { - ; // Hack is used for FMV which are likely 24/32 bits. Let's keep the for reference -#if 0 - // Based on WritePixel16 - for(int y = r.top; y < r.bottom; y++) - { - uint32* RESTRICT d = &m_mem.m_vm16[off->pixel.row[y]]; - int* RESTRICT col = off->pixel.col[0]; - - for(int x = r.left; x < r.right; x++) - { - d[col[x]] = 0; // Here the constant color - } - } -#endif - } - } -} - -// OI (others input?/implementation?) hacks replace current draw call - -bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - static uint32* video = NULL; - static size_t lines = 0; - - if(lines == 0) - { - if(m_vt.m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2)) - { - lines = m_vertex.next / 2; - } - } - else - { - if(m_vt.m_primclass == GS_POINT_CLASS) - { - if(m_vertex.next >= 16 * 512) - { - // incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454 - - if(!video) video = new uint32[512 * 512]; - - int ox = m_context->XYOFFSET.OFX - 8; - int oy = m_context->XYOFFSET.OFY - 8; - - const GSVertex* RESTRICT v = m_vertex.buff; - - for(int i = (int)m_vertex.next; i > 0; i--, v++) - { - int x = (v->XYZ.X - ox) >> 4; - int y = (v->XYZ.Y - oy) >> 4; - - if (x < 0 || x >= 448 || y < 0 || y >= (int)lines) return false; // le sigh - - video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0]; - } - - return false; - } - else - { - lines = 0; - } - } - else if(m_vt.m_primclass == GS_LINE_CLASS) - { - if(m_vertex.next == lines * 2) - { - // normally, this step would copy the video onto screen with 512 texture mapped horizontal lines, - // but we use the stored video data to create a new texture, and replace the lines with two triangles - - m_dev->Recycle(t->m_texture); - - t->m_texture = m_dev->CreateTexture(512, 512); - - t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4); - - m_vertex.buff[2] = m_vertex.buff[m_vertex.next - 2]; - m_vertex.buff[3] = m_vertex.buff[m_vertex.next - 1]; - - m_index.buff[0] = 0; - m_index.buff[1] = 1; - m_index.buff[2] = 2; - m_index.buff[3] = 1; - m_index.buff[4] = 2; - m_index.buff[5] = 3; - - m_vertex.head = m_vertex.tail = m_vertex.next = 4; - m_index.tail = 6; - - m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS); - } - else - { - lines = 0; - } - } - } - - return true; -} - -bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 ZBP = m_context->ZBUF.Block(); - uint32 TBP = m_context->TEX0.TBP0; - - if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S) - { - // random battle transition (z buffer written directly, clear it now) - - m_dev->ClearDepth(ds, 0); - } - - return true; -} - -bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - // missing red channel fix (looks alright in pcsx2 r5000+) - - GSVertex* RESTRICT v = m_vertex.buff; - - for(int i = (int)m_vertex.next; i > 0; i--, v++) - { - uint32 c = v->RGBAQ.u32[0]; - - uint32 r = (c >> 0) & 0xff; - uint32 g = (c >> 8) & 0xff; - uint32 b = (c >> 16) & 0xff; - - if(r == 0 && g != 0 && b != 0) - { - v->RGBAQ.u32[0] = (c & 0xffffff00) | ((g + b + 1) >> 1); - } - } - - m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt.m_primclass); - - return true; -} - -bool GSRendererHW::OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FBW = m_context->FRAME.FBW; - uint32 FPSM = m_context->FRAME.PSM; - - if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280 - { - // z buffer clear - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = FBP; - TEX0.TBW = FBW; - TEX0.PSM = FPSM; - - if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) - { - m_dev->ClearDepth(ds->m_texture, 0); - } - - return false; - } - - return true; -} - -bool GSRendererHW::OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc - { - // instead of just simply drawing a full height 512x512 sprite to clear the z buffer, - // it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros, - // how? by using a render target that overlaps with the lower half of the z buffer... - - // TODO: tony hawk pro skater 4 same problem, the empty half is not visible though, painted over fully - - m_dev->ClearDepth(ds, 0); - - return false; - } - - return true; -} - -bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - if(!PRIM->TME) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 ZBP = m_context->ZBUF.Block(); - - if(FBP == 0x008c0 && ZBP == 0x01a40) - { - // frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = ZBP; - TEX0.TBW = m_context->FRAME.FBW; - TEX0.PSM = m_context->FRAME.PSM; - - if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true)) - { - m_dev->ClearRenderTarget(rt->m_texture, 0); - } - - return false; - } - else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180) - { - // z buffer clear, frame buffer now points to the z buffer (how can they be so clever?) - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = FBP; - TEX0.TBW = m_context->FRAME.FBW; - TEX0.PSM = m_context->ZBUF.PSM; - - if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) - { - m_dev->ClearDepth(ds->m_texture, 0); - } - - return false; - } - } - - return true; -} - -bool GSRendererHW::OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc - { - //only top half of the screen clears - m_dev->ClearDepth(ds, 0); - } - - return true; -} - -bool GSRendererHW::OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - - return true; -} - -bool GSRendererHW::OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(!PRIM->TME) - { - if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - } - } - - return true; -} - -bool GSRendererHW::OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(FBP == 0x02000 && FPSM == PSM_PSMZ24) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - - return true; -} - -bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(!PRIM->TME) - { - if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0) - { - m_dev->ClearDepth(ds, 0); - - return false; - } - } - else if(PRIM->TME) - { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_max.p.z == 0)) - { - m_dev->ClearDepth(ds, 0); - } - } - - return true; -} - -bool GSRendererHW::OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(FBP == 0x0 && FPSM == PSM_PSMCT16) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - } - - return true; -} - -bool GSRendererHW::OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(FBP == 0x02300 && FPSM == PSM_PSMZ24) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - - return true; -} - -bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(!PRIM->TME) - { - if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - } - else if(PRIM->TME) - { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0)) - { - m_dev->ClearDepth(ds, 0); - } - } - - return true; -} - -bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(!PRIM->TME) - { - if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - } - else if(PRIM->TME) - { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0)) - { - m_dev->ClearDepth(ds, 0); - } - } - - return true; -} - -bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_eq.z) - { - m_context->TEST.ZTST = ZTST_ALWAYS; - //m_dev->ClearDepth(ds, 0); - } - - return true; -} - -bool GSRendererHW::OI_SMTNocturne(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - uint32 FBMSK = m_context->FRAME.FBMSK; - uint32 FBP = m_context->FRAME.Block(); - uint32 FBW = m_context->FRAME.FBW; - uint32 FPSM = m_context->FRAME.PSM; - - if(FBMSK == 16777215 && m_vertex.head != 2 && m_vertex.tail != 4 && m_vertex.next != 4) - { - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = FBP; - TEX0.TBW = FBW; - TEX0.PSM = FPSM; - if (GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) - { - m_dev->ClearDepth(ds->m_texture, 0); - } - return false; - } - - return true; -} - -bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FBW = m_context->FRAME.FBW; - - if(FBP >= 0x03f40 && (FBP & 0x1f) == 0) - { - if(m_vertex.next == 16) - { - GSVertex* RESTRICT v = m_vertex.buff; - - for(int i = 0; i < 16; i++, v++) - { - uint32 c = v->RGBAQ.u32[0]; - uint32 a = c >> 24; - - c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff); - - v->RGBAQ.u32[0] = c; - - m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW); - } - - m_mem.m_clut.Invalidate(); - - return false; - } - else if(m_vertex.next == 256) - { - GSVertex* RESTRICT v = m_vertex.buff; - - for(int i = 0; i < 256; i++, v++) - { - uint32 c = v->RGBAQ.u32[0]; - uint32 a = c >> 24; - - c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff); - - v->RGBAQ.u32[0] = c; - - m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW); - } - - m_mem.m_clut.Invalidate(); - - return false; - } - else - { - ASSERT(0); - } - } - } - - return true; -} - -bool GSRendererHW::OI_SuperManReturns(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) -{ - // Instead to use a fullscreen rectangle they use a 32 pixels, 4096 pixels with a FBW of 1. - // Technically the FB wrap/overlap on itself... - GSDrawingContext* ctx = m_context; - GSVertex* v = &m_vertex.buff[0]; - - if (!(ctx->FRAME.FBP == ctx->ZBUF.ZBP && !PRIM->TME && !ctx->ZBUF.ZMSK && !ctx->FRAME.FBMSK && m_vt.m_eq.rgba == 0xFFFF)) - return true; - - // Please kill those crazy devs! - ASSERT(m_vertex.next == 2); - ASSERT(m_vt.m_primclass == GS_SPRITE_CLASS); - ASSERT((v->RGBAQ.A << 24 | v->RGBAQ.B << 16 | v->RGBAQ.G << 8 | v->RGBAQ.R) == (int)v->XYZ.Z); - - // Do a direct write - m_dev->ClearRenderTarget(rt, GSVector4(m_vt.m_min.c)); - - m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, ctx->FRAME.Block()); - - return false; -} - - -// OO (others output?) hacks: invalidate extra local memory after the draw call - -void GSRendererHW::OO_DBZBT2() -{ - // palette readback (cannot detect yet, when fetching the texture later) - - uint32 FBP = m_context->FRAME.Block(); - uint32 TBP0 = m_context->TEX0.TBP0; - - if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40)) - { - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.SBP = FBP; - BITBLTBUF.SBW = 1; - BITBLTBUF.SPSM = PSM_PSMCT32; - - InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64)); - } -} - -void GSRendererHW::OO_MajokkoALaMode2() -{ - // palette readback - - uint32 FBP = m_context->FRAME.Block(); - - if(!PRIM->TME && FBP == 0x03f40) - { - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.SBP = FBP; - BITBLTBUF.SBW = 1; - BITBLTBUF.SPSM = PSM_PSMCT32; - - InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16)); - } -} - -// Can Upscale hacks: disable upscaling for some draw calls - -bool GSRendererHW::CU_DBZBT2() -{ - // palette should stay 64 x 64 - - uint32 FBP = m_context->FRAME.Block(); - - return FBP != 0x03c00 && FBP != 0x03ac0; -} - -bool GSRendererHW::CU_MajokkoALaMode2() -{ - // palette should stay 16 x 16 - - uint32 FBP = m_context->FRAME.Block(); - - return FBP != 0x03f40; -} - -bool GSRendererHW::CU_TalesOfAbyss() -{ - // full image blur and brightening - - uint32 FBP = m_context->FRAME.Block(); - - return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0; -} diff --git a/plugins/GSdx_legacy/GSRendererHW.h b/plugins/GSdx_legacy/GSRendererHW.h deleted file mode 100644 index 83272e7899..0000000000 --- a/plugins/GSdx_legacy/GSRendererHW.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRenderer.h" -#include "GSTextureCache.h" -#include "GSCrc.h" -#include "GSFunctionMap.h" -#include "GSState.h" - -class GSRendererHW : public GSRenderer -{ -private: - int m_width; - int m_height; - int m_skip; - bool m_reset; - int m_upscale_multiplier; - int m_userhacks_skipdraw; - - bool m_userhacks_align_sprite_X; - bool m_userhacks_disable_gs_mem_clear; - - #pragma region hacks - - typedef bool (GSRendererHW::*OI_Ptr)(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - typedef void (GSRendererHW::*OO_Ptr)(); - typedef bool (GSRendererHW::*CU_Ptr)(); - - // Require special argument - void OI_GsMemClear(); // always on - - bool OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_SMTNocturne(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - bool OI_SuperManReturns(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - void OO_DBZBT2(); - void OO_MajokkoALaMode2(); - - bool CU_DBZBT2(); - bool CU_MajokkoALaMode2(); - bool CU_TalesOfAbyss(); - - class Hacks - { - template class HackEntry - { - public: - CRC::Title title; - CRC::Region region; - T func; - - HackEntry(CRC::Title t, CRC::Region r, T f) - { - title = t; - region = r; - func = f; - } - }; - - template class FunctionMap : public GSFunctionMap - { - list >& m_tbl; - - T GetDefaultFunction(uint32 key) - { - CRC::Title title = (CRC::Title)(key & 0xffffff); - CRC::Region region = (CRC::Region)(key >> 24); - - for(typename list >::iterator i = m_tbl.begin(); i != m_tbl.end(); i++) - { - if(i->title == title && (i->region == CRC::RegionCount || i->region == region)) - { - return i->func; - } - } - - return NULL; - } - - public: - FunctionMap(list >& tbl) : m_tbl(tbl) {} - }; - - list > m_oi_list; - list > m_oo_list; - list > m_cu_list; - - FunctionMap m_oi_map; - FunctionMap m_oo_map; - FunctionMap m_cu_map; - - public: - OI_Ptr m_oi; - OO_Ptr m_oo; - CU_Ptr m_cu; - - Hacks(); - - void SetGameCRC(const CRC::Game& game); - - } m_hacks; - - #pragma endregion - - int Interpolate_UV(float alpha, int t0, int t1); - float alpha0(int L, int X0, int X1); - float alpha1(int L, int X0, int X1); - - template void RoundSpriteOffset(); - -protected: - GSTextureCache* m_tc; - - virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0; - - int m_userhacks_round_sprite_offset; - -public: - GSRendererHW(GSTextureCache* tc); - virtual ~GSRendererHW(); - - void SetGameCRC(uint32 crc, int options); - bool CanUpscale(); - int GetUpscaleMultiplier(); - virtual GSVector2i GetInternalResolution(); - void SetScaling(); - - void Reset(); - void VSync(int field); - void ResetDevice(); - GSTexture* GetOutput(int i); - void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); - void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); - void Draw(); -}; diff --git a/plugins/GSdx_legacy/GSRendererNull.cpp b/plugins/GSdx_legacy/GSRendererNull.cpp deleted file mode 100644 index 19b8e88471..0000000000 --- a/plugins/GSdx_legacy/GSRendererNull.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererNull.h" diff --git a/plugins/GSdx_legacy/GSRendererNull.h b/plugins/GSdx_legacy/GSRendererNull.h deleted file mode 100644 index f7f26f9941..0000000000 --- a/plugins/GSdx_legacy/GSRendererNull.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRenderer.h" - -class GSRendererNull : public GSRenderer -{ - class GSVertexTraceNull : public GSVertexTrace - { - public: - GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {} - }; - -protected: - void Draw() - { - } - - GSTexture* GetOutput(int i) - { - return NULL; - } - -public: - GSRendererNull() - : GSRenderer() - { - } -}; diff --git a/plugins/GSdx_legacy/GSRendererOGL.cpp b/plugins/GSdx_legacy/GSRendererOGL.cpp deleted file mode 100644 index 84afbc66eb..0000000000 --- a/plugins/GSdx_legacy/GSRendererOGL.cpp +++ /dev/null @@ -1,1157 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererOGL.h" -#include "GSRenderer.h" - - -GSRendererOGL::GSRendererOGL() - : GSRendererHW(new GSTextureCacheOGL(this)) -{ - m_accurate_date = theApp.GetConfig("accurate_date", 0); - - m_sw_blending = theApp.GetConfig("accurate_blending_unit", 1); - - // Hope nothing requires too many draw calls. - m_drawlist.reserve(2048); - - UserHacks_TCOffset = theApp.GetConfig("UserHacks_TCOffset", 0); - UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f; - UserHacks_TCO_y = ((UserHacks_TCOffset >> 16) & 0xFFFF) / -1000.0f; - UserHacks_safe_fbmask = theApp.GetConfig("UserHacks_safe_fbmask", false); - - m_prim_overlap = PRIM_OVERLAP_UNKNOW; - m_unsafe_fbmask = false; - - if (!theApp.GetConfig("UserHacks", 0)) { - UserHacks_TCOffset = 0; - UserHacks_TCO_x = 0; - UserHacks_TCO_y = 0; - UserHacks_safe_fbmask = false; - } -} - -bool GSRendererOGL::CreateDevice(GSDevice* dev) -{ - if (!GSRenderer::CreateDevice(dev)) - return false; - - // No sw blending if not supported (Intel GPU) - if (!GLLoader::found_GL_ARB_texture_barrier) { - fprintf(stderr, "Error GL_ARB_texture_barrier is not supported by your driver. You can't emulate correctly the GS blending unit! Sorry!\n"); - m_accurate_date = false; - m_sw_blending = 0; - } - - - return true; -} - -void GSRendererOGL::EmulateGS() -{ - if (m_vt.m_primclass != GS_SPRITE_CLASS) return; - - // each sprite converted to quad needs twice the space - - while(m_vertex.tail * 2 > m_vertex.maxcount) - { - GrowVertexBuffer(); - } - - // assume vertices are tightly packed and sequentially indexed (it should be the case) - - if (m_vertex.next >= 2) - { - size_t count = m_vertex.next; - - int i = (int)count * 2 - 4; - GSVertex* s = &m_vertex.buff[count - 2]; - GSVertex* q = &m_vertex.buff[count * 2 - 4]; - uint32* RESTRICT index = &m_index.buff[count * 3 - 6]; - - for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) - { - GSVertex v0 = s[0]; - GSVertex v1 = s[1]; - - v0.RGBAQ = v1.RGBAQ; - v0.XYZ.Z = v1.XYZ.Z; - v0.FOG = v1.FOG; - - q[0] = v0; - q[3] = v1; - - // swap x, s, u - - uint16 x = v0.XYZ.X; - v0.XYZ.X = v1.XYZ.X; - v1.XYZ.X = x; - - float s = v0.ST.S; - v0.ST.S = v1.ST.S; - v1.ST.S = s; - - uint16 u = v0.U; - v0.U = v1.U; - v1.U = u; - - q[1] = v0; - q[2] = v1; - - index[0] = i + 0; - index[1] = i + 1; - index[2] = i + 2; - index[3] = i + 1; - index[4] = i + 2; - index[5] = i + 3; - } - - m_vertex.head = m_vertex.tail = m_vertex.next = count * 2; - m_index.tail = count * 3; - } -} - -void GSRendererOGL::SetupIA() -{ - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; - - if (!GLLoader::found_geometry_shader) - EmulateGS(); - - dev->IASetVertexBuffer(m_vertex.buff, m_vertex.next); - dev->IASetIndexBuffer(m_index.buff, m_index.tail); - - GLenum t = 0; - - switch(m_vt.m_primclass) - { - case GS_POINT_CLASS: - t = GL_POINTS; - break; - case GS_LINE_CLASS: - t = GL_LINES; - break; - case GS_SPRITE_CLASS: - if (GLLoader::found_geometry_shader) - t = GL_LINES; - else - t = GL_TRIANGLES; - break; - case GS_TRIANGLE_CLASS: - t = GL_TRIANGLES; - break; - default: - __assume(0); - } - - dev->IASetPrimitiveTopology(t); -} - -bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel) -{ - bool require_barrier = false; - - if (m_texture_shuffle) { - ps_sel.shuffle = 1; - ps_sel.dfmt = 0; - - const GIFRegXYOFFSET& o = m_context->XYOFFSET; - GSVertex* v = &m_vertex.buff[0]; - size_t count = m_vertex.next; - - // vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors - int pos = (v[0].XYZ.X - o.OFX) & 0xFF; - bool write_ba = (pos > 112 && pos < 136); - // Read texture is 8 to 16 pixels (same as above) - float tw = (float)(1u << m_context->TEX0.TW); - int tex_pos = (PRIM->FST) ? v[0].U : tw * v[0].ST.S; - tex_pos &= 0xFF; - ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144); - - // Convert the vertex info to a 32 bits color format equivalent - if (PRIM->FST) { - GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U); - - for(size_t i = 0; i < count; i += 2) { - if (write_ba) - v[i].XYZ.X -= 128u; - else - v[i+1].XYZ.X += 128u; - - if (ps_sel.read_ba) - v[i].U -= 128u; - else - v[i+1].U += 128u; - - // Height is too big (2x). - int tex_offset = v[i].V & 0xF; - GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); - - GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i+1].XYZ.Y, v[i+1].V); - tmp = GSVector4i(tmp - offset).srl32(1) + offset; - - v[i].XYZ.Y = tmp.x; - v[i].V = tmp.y; - v[i+1].XYZ.Y = tmp.z; - v[i+1].V = tmp.w; - } - } else { - const float offset_8pix = 8.0f / tw; - GL_INS("First vertex is P: %d => %d T: %f => %f (offset %f)", v[0].XYZ.X, v[1].XYZ.X, v[0].ST.S, v[1].ST.S, offset_8pix); - - for(size_t i = 0; i < count; i += 2) { - if (write_ba) - v[i].XYZ.X -= 128u; - else - v[i+1].XYZ.X += 128u; - - if (ps_sel.read_ba) - v[i].ST.S -= offset_8pix; - else - v[i+1].ST.S += offset_8pix; - - // Height is too big (2x). - GSVector4i offset(o.OFY, o.OFY); - - GSVector4i tmp(v[i].XYZ.Y, v[i+1].XYZ.Y); - tmp = GSVector4i(tmp - offset).srl32(1) + offset; - - //fprintf(stderr, "Before %d, After %d\n", v[i+1].XYZ.Y, tmp.y); - v[i].XYZ.Y = tmp.x; - v[i].ST.T /= 2.0f; - v[i+1].XYZ.Y = tmp.y; - v[i+1].ST.T /= 2.0f; - } - } - - // If date is enabled you need to test the green channel instead of the - // alpha channel. Only enable this code in DATE mode to reduce the number - // of shader. - ps_sel.write_rg = !write_ba && m_context->TEST.DATE; - - // Please bang my head against the wall! - // 1/ Reduce the frame mask to a 16 bit format - const uint32& m = m_context->FRAME.FBMSK; - uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 16) & 0x8000); - // FIXME GSVector will be nice here - uint8 rg_mask = fbmask & 0xFF; - uint8 ba_mask = (fbmask >> 8) & 0xFF; - om_csel.wrgba = 0; - - // 2 Select the new mask (Please someone put SSE here) - if (rg_mask != 0xFF) { - if (write_ba) { - GL_INS("Color shuffle %s => B", ps_sel.read_ba ? "B" : "R"); - om_csel.wb = 1; - } else { - GL_INS("Color shuffle %s => R", ps_sel.read_ba ? "B" : "R"); - om_csel.wr = 1; - } - if (rg_mask) - ps_sel.fbmask = 1; - } - - if (ba_mask != 0xFF) { - if (write_ba) { - GL_INS("Color shuffle %s => A", ps_sel.read_ba ? "A" : "G"); - om_csel.wa = 1; - } else { - GL_INS("Color shuffle %s => G", ps_sel.read_ba ? "A" : "G"); - om_csel.wg = 1; - } - if (ba_mask) - ps_sel.fbmask = 1; - } - - if (ps_sel.fbmask && m_sw_blending) { - GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask); - ps_cb.FbMask.r = rg_mask; - ps_cb.FbMask.g = rg_mask; - ps_cb.FbMask.b = ba_mask; - ps_cb.FbMask.a = ba_mask; - require_barrier = true; - } else { - ps_sel.fbmask = 0; - } - - } else { - ps_sel.dfmt = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt; - - GSVector4i fbmask_v = GSVector4i::load((int)m_context->FRAME.FBMSK); - int ff_fbmask = fbmask_v.eq8(GSVector4i::xffffffff()).mask(); - int zero_fbmask = fbmask_v.eq8(GSVector4i::zero()).mask(); - - om_csel.wrgba = ~ff_fbmask; // Enable channel if at least 1 bit is 0 - - ps_sel.fbmask = m_sw_blending && (~ff_fbmask & ~zero_fbmask & 0xF); - - if (ps_sel.fbmask) { - ps_cb.FbMask = fbmask_v.u8to32(); - // Only alpha is special here, I think we can take a very unsafe shortcut - // Alpha isn't blended on the GS but directly copyied into the RT. - // - // Behavior is clearly undefined however there is a high probability that - // it will work. Masked bit will be constant and normally the same everywhere - // RT/FS output/Cached value. - // - // Just to be sure let's add a new safe hack for unsafe access :) - // - // Here the GL spec quote to emphasize the unexpected behavior. - /* - - If a texel has been written, then in order to safely read the result - a texel fetch must be in a subsequent Draw separated by the command - - void TextureBarrier(void); - - TextureBarrier() will guarantee that writes have completed and caches - have been invalidated before subsequent Draws are executed. - */ - if (!(~ff_fbmask & ~zero_fbmask & 0x7) && !UserHacks_safe_fbmask) { - GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, - (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); - m_unsafe_fbmask = true; - require_barrier = false; - } else { - // The safe and accurate path (but slow) - GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, - (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); - require_barrier = true; - } - } - } - - return require_barrier; -} - -bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_GL42) -{ - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; - const GIFRegALPHA& ALPHA = m_context->ALPHA; - bool require_barrier = false; - bool sw_blending = false; - - // No blending so early exit - if (!(PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS)) { -#ifdef ENABLE_OGL_DEBUG - if (m_env.PABE.PABE) { - GL_INS("!!! ENV PABE without ABE !!!"); - } -#endif - dev->OMSetBlendState(); - return false; - } - - if (m_env.PABE.PABE) - { - GL_INS("!!! ENV PABE not supported !!!"); - if (m_sw_blending >= ACC_BLEND_CCLIP_DALPHA) { - ps_sel.pabe = 1; - require_barrier |= (ALPHA.C == 1); - sw_blending = true; - } - //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. - //ASSERT(0); - } - - // Compute the blending equation to detect special case - uint8 blend_index = ((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D; - int blend_flag = GSDeviceOGL::m_blendMapOGL[blend_index].bogus; - - // SW Blend is (nearly) free. Let's use it. - bool impossible_or_free_blend = (blend_flag & (BLEND_NO_BAR|BLEND_A_MAX|BLEND_ACCU)) - || (m_prim_overlap == PRIM_OVERLAP_NO); - - // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd - bool accumulation_blend = (blend_flag & BLEND_ACCU); - - // Warning no break on purpose - switch (m_sw_blending) { - case ACC_BLEND_ULTRA: sw_blending |= true; - case ACC_BLEND_FULL: if (!m_vt.m_alpha.valid && (ALPHA.C == 0)) GetAlphaMinMax(); - sw_blending |= (ALPHA.A != ALPHA.B) && - ((ALPHA.C == 0 && m_vt.m_alpha.max > 128) || (ALPHA.C == 2 && ALPHA.FIX > 128u)); - case ACC_BLEND_CCLIP_DALPHA: sw_blending |= (ALPHA.C == 1) || (m_env.COLCLAMP.CLAMP == 0); - // Initial idea was to enable accurate blending for sprite rendering to handle - // correctly post-processing effect. Some games (ZoE) use tons of sprites as particles. - // In order to keep it fast, let's limit it to smaller draw call. - case ACC_BLEND_SPRITE: sw_blending |= m_vt.m_primclass == GS_SPRITE_CLASS && m_drawlist.size() < 100; - case ACC_BLEND_FREE: sw_blending |= (ps_sel.fbmask && !m_unsafe_fbmask) || impossible_or_free_blend; // blending is only free when we use slow fbmask - default: sw_blending |= accumulation_blend; - } - // SW Blending - // GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive - // that write the bad alpha value. Sw blending will force the draw to run primitive by primitive - // (therefore primitiveID will be constant to 1) - sw_blending &= !DATE_GL42; - - // Color clip - if (m_env.COLCLAMP.CLAMP == 0) { - if (m_prim_overlap == PRIM_OVERLAP_NO) { - // The fastest algo that requires a single pass - GL_INS("COLCLIP Free mode ENABLED"); - ps_sel.colclip = 1; - ASSERT(sw_blending); - accumulation_blend = false; // disable the HDR algo - } else if (accumulation_blend) { - // A fast algo that requires 2 passes - GL_INS("COLCLIP Fast HDR mode ENABLED"); - ps_sel.hdr = 1; - } else if (sw_blending) { - // A slow algo that could requires several passes (barely used) - GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); - ps_sel.colclip = 1; - } else { - // Speed hack skip previous slow algo - GL_INS("Sorry colclip isn't supported"); - } - } - - // Seriously don't expect me to support this kind of crazyness. - // No mix of COLCLIP + accumulation_blend + DATE GL42 - // Neither fbmask and GL42 - ASSERT(!(ps_sel.hdr && DATE_GL42)); - ASSERT(!(ps_sel.fbmask && DATE_GL42)); - - // For stat to optimize accurate option -#if 0 - GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)", - ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); -#endif - if (sw_blending) { - ps_sel.blend_a = ALPHA.A; - ps_sel.blend_b = ALPHA.B; - ps_sel.blend_c = ALPHA.C; - ps_sel.blend_d = ALPHA.D; - - if (accumulation_blend) { - // Keep HW blending to do the addition/subtraction - dev->OMSetBlendState(blend_index); - if (ALPHA.A == 2) { - // The blend unit does a reverse subtraction so it means - // the shader must output a positive value. - // Replace 0 - Cs by Cs - 0 - ps_sel.blend_a = ALPHA.B; - ps_sel.blend_b = 2; - } - // Remove the addition/substraction from the SW blending - ps_sel.blend_d = 2; - } else { - // Disable HW blending - dev->OMSetBlendState(); - } - - // Require the fix alpha vlaue - if (ALPHA.C == 2) { - ps_cb.TA_Af.a = (float)ALPHA.FIX / 128.0f; - } - - // No need to flush for every primitive - require_barrier |= !(blend_flag & BLEND_NO_BAR) && !accumulation_blend; - } else { - ps_sel.clr1 = !!(blend_flag & BLEND_C_CLR); - if (ps_sel.dfmt == 1 && ALPHA.C == 1) { - // 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent - int hacked_blend_index = blend_index + 3; // +3 <=> +1 on C - dev->OMSetBlendState(hacked_blend_index, 128, true); - } else { - dev->OMSetBlendState(blend_index, ALPHA.FIX, (ALPHA.C == 2)); - } - } - - return require_barrier; -} - -GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap() -{ - // Either 1 triangle or 1 line or 3 POINTs - // It is bad for the POINTs but low probability that they overlap - if (m_vertex.next < 4) - return PRIM_OVERLAP_NO; - - if (m_vt.m_primclass != GS_SPRITE_CLASS) - return PRIM_OVERLAP_UNKNOW; // maybe, maybe not - - // Check intersection of sprite primitive only - size_t count = m_vertex.next; - PRIM_OVERLAP overlap = PRIM_OVERLAP_NO; - GSVertex* v = m_vertex.buff; - - m_drawlist.clear(); - size_t i = 0; - while (i < count) { - // In order to speed up comparison a bounding-box is accumulated. It removes a - // loop so code is much faster (check game virtua fighter). Besides it allow to check - // properly the Y order. - - // .x = min(v[i].XYZ.X, v[i+1].XYZ.X) - // .y = min(v[i].XYZ.Y, v[i+1].XYZ.Y) - // .z = max(v[i].XYZ.X, v[i+1].XYZ.X) - // .w = max(v[i].XYZ.Y, v[i+1].XYZ.Y) - GSVector4i all = GSVector4i(v[i].m[1]).upl16(GSVector4i(v[i+1].m[1])).upl16().xzyw(); - all = all.xyxy().blend(all.zwzw(), all > all.zwxy()); - - size_t j = i + 2; - while (j < count) { - GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j+1].m[1])).upl16().xzyw(); - sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy()); - - // Be sure to get vertex in good order, otherwise .r* function doesn't - // work as expected. - ASSERT(sprite.x <= sprite.z); - ASSERT(sprite.y <= sprite.w); - ASSERT(all.x <= all.z); - ASSERT(all.y <= all.w); - - if (all.rintersect(sprite).rempty()) { - all = all.runion_ordered(sprite); - } else { - overlap = PRIM_OVERLAP_YES; - break; - } - j += 2; - } - m_drawlist.push_back((j - i) >> 1); // Sprite count - i = j; - } - -#if 0 - // Old algo: less constraint but O(n^2) instead of O(n) as above - - // You have no guarantee on the sprite order, first vertex can be either top-left or bottom-left - // There is a high probability that the draw call will uses same ordering for all vertices. - // In order to keep a small performance impact only the first sprite will be checked - // - // Some safe-guard will be added in the outer-loop to avoid corruption with a limited perf impact - if (v[1].XYZ.Y < v[0].XYZ.Y) { - // First vertex is Top-Left - for(size_t i = 0; i < count; i += 2) { - if (v[i+1].XYZ.Y > v[i].XYZ.Y) { - return PRIM_OVERLAP_UNKNOW; - } - GSVector4i vi(v[i].XYZ.X, v[i+1].XYZ.Y, v[i+1].XYZ.X, v[i].XYZ.Y); - for (size_t j = i+2; j < count; j += 2) { - GSVector4i vj(v[j].XYZ.X, v[j+1].XYZ.Y, v[j+1].XYZ.X, v[j].XYZ.Y); - GSVector4i inter = vi.rintersect(vj); - if (!inter.rempty()) { - return PRIM_OVERLAP_YES; - } - } - } - } else { - // First vertex is Bottom-Left - for(size_t i = 0; i < count; i += 2) { - if (v[i+1].XYZ.Y < v[i].XYZ.Y) { - return PRIM_OVERLAP_UNKNOW; - } - GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i+1].XYZ.X, v[i+1].XYZ.Y); - for (size_t j = i+2; j < count; j += 2) { - GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j+1].XYZ.X, v[j+1].XYZ.Y); - GSVector4i inter = vi.rintersect(vj); - if (!inter.rempty()) { - return PRIM_OVERLAP_YES; - } - } - } - } -#endif - - //fprintf(stderr, "%d: Yes, code can be optimized (draw of %d vertices)\n", s_n, count); - return overlap; -} - -GSVector4i GSRendererOGL::ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize) -{ - GSVector4 scale = GSVector4(rtscale.x, rtscale.y); - GSVector4 offset = GSVector4(-1.0f, 1.0f); // Round value - GSVector4 box = m_vt.m_min.p.xyxy(m_vt.m_max.p) + offset.xxyy(); - return GSVector4i(box * scale.xyxy()).rintersect(GSVector4i(0, 0, rtsize.x, rtsize.y)); -} - -void GSRendererOGL::SendDraw(bool require_barrier) -{ - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; - - if (!require_barrier && m_unsafe_fbmask) { - // Not safe but still worth to take some precautions. - ASSERT(GLLoader::found_GL_ARB_texture_barrier); - glTextureBarrier(); - dev->DrawIndexedPrimitive(); - } else if (!require_barrier) { - dev->DrawIndexedPrimitive(); - } else if (m_prim_overlap == PRIM_OVERLAP_NO) { - ASSERT(GLLoader::found_GL_ARB_texture_barrier); - glTextureBarrier(); - dev->DrawIndexedPrimitive(); - } else if (m_vt.m_primclass == GS_SPRITE_CLASS) { - size_t nb_vertex = (GLLoader::found_geometry_shader) ? 2 : 6; - - GL_PUSH("Split the draw (SPRITE)"); - -#if defined(_DEBUG) - // Check how draw call is split. - map frequency; - for (const auto& it: m_drawlist) - ++frequency[it]; - - string message; - for (const auto& it: frequency) - message += " " + to_string(it.first) + "(" + to_string(it.second) + ")"; - - GL_PERF("Split single draw (%d sprites) into %zu draws: consecutive draws(frequency):%s", - m_index.tail / nb_vertex, m_drawlist.size(), message.c_str()); -#endif - - for (size_t count, p = 0, n = 0; n < m_drawlist.size(); p += count, ++n) { - count = m_drawlist[n] * nb_vertex; - glTextureBarrier(); - dev->DrawIndexedPrimitive(p, count); - } - - GL_POP(); - } else { - // FIXME: Investigate: a dynamic check to pack as many primitives as possibles - // I'm nearly sure GSdx already have this kind of code (maybe we can adapt GSDirtyRect) - size_t nb_vertex; - switch (m_vt.m_primclass) { - case GS_TRIANGLE_CLASS: nb_vertex = 3; break; - case GS_POINT_CLASS: nb_vertex = 1; break; - default: nb_vertex = 2; break; - } - - GL_PUSH("Split the draw"); - - GL_PERF("Split single draw in %d draw", m_index.tail/nb_vertex); - - for (size_t p = 0; p < m_index.tail; p += nb_vertex) { - glTextureBarrier(); - dev->DrawIndexedPrimitive(p, nb_vertex); - } - - GL_POP(); - } -} - -void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) -{ - GSDeviceOGL::VSSelector vs_sel; - GSDeviceOGL::GSSelector gs_sel; - - GSDeviceOGL::PSSelector ps_sel; - GSDeviceOGL::PSSamplerSelector ps_ssel; - - GSDeviceOGL::OMColorMaskSelector om_csel; - GSDeviceOGL::OMDepthStencilSelector om_dssel; - - GL_PUSH("GL Draw from %d in %d (Depth %d)", - tex && tex->m_texture ? tex->m_texture->GetID() : 0, - rt ? rt->GetID() : -1, ds ? ds->GetID() : -1); - - GSTexture* hdr_rt = NULL; - - const GSVector2i& rtsize = ds ? ds->GetSize() : rt->GetSize(); - const GSVector2& rtscale = ds ? ds->GetScale() : rt->GetScale(); - - bool DATE = m_context->TEST.DATE && m_context->FRAME.PSM != PSM_PSMCT24; - bool DATE_GL42 = false; - bool DATE_GL45 = false; - - bool require_barrier = false; // For accurate option - m_unsafe_fbmask = false; - - ASSERT(m_dev != NULL); - - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; - dev->s_n = s_n; - - if ((DATE || m_sw_blending) && GLLoader::found_GL_ARB_texture_barrier && (m_vt.m_primclass == GS_SPRITE_CLASS)) { - // Except 2D games, sprites are often use for special post-processing effect - m_prim_overlap = PrimitiveOverlap(); - } else { - m_prim_overlap = PRIM_OVERLAP_UNKNOW; - } -#ifdef ENABLE_OGL_DEBUG - if (m_sw_blending && (m_prim_overlap != PRIM_OVERLAP_NO) && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && (m_vertex.next > 2)) { - GL_INS("ERROR: Source and Target are the same!"); - } -#endif - - require_barrier |= EmulateTextureShuffleAndFbmask(ps_sel, om_csel); - - // DATE: selection of the algorithm. Must be done before blending because GL42 is not compatible with blending - - if (DATE && GLLoader::found_GL_ARB_texture_barrier) { - if (m_prim_overlap == PRIM_OVERLAP_NO || m_texture_shuffle) { - // It is way too complex to emulate texture shuffle with DATE. So just use - // the slow but accurate algo - require_barrier = true; - DATE_GL45 = true; - DATE = false; - } else if (m_accurate_date && om_csel.wa /* FIXME Check the msb bit of the mask instead + the dfmt*/ - && (!m_context->TEST.ATE || m_context->TEST.ATST == ATST_ALWAYS)) { - // texture barrier will split the draw call into n draw call. It is very efficient for - // few primitive draws. Otherwise it sucks. - if (m_index.tail < 100) { - require_barrier = true; - DATE_GL45 = true; - DATE = false; - } else { - DATE_GL42 = GLLoader::found_GL_ARB_shader_image_load_store; - } - } - } - - // Blend - - if (!IsOpaque() && rt) { - require_barrier |= EmulateBlending(ps_sel, DATE_GL42); - } else { - dev->OMSetBlendState(); // No blending please - } - - if (ps_sel.dfmt == 1) { - // Disable writing of the alpha channel - om_csel.wa = 0; - } - - // DATE (setup part) - - if (DATE) { - GSVector4i dRect = ComputeBoundingBox(rtscale, rtsize); - - // Reduce the quantity of clean function - glScissor( dRect.x, dRect.y, dRect.width(), dRect.height() ); - GLState::scissor = dRect; - - // Must be done here to avoid any GL state pertubation (clear function...) - // Create an r32ui image that will containt primitive ID - if (DATE_GL42) { - dev->InitPrimDateTexture(rt); - } else { - GSVector4 src = GSVector4(dRect) / GSVector4(rtsize.x, rtsize.y).xyxy(); - GSVector4 dst = src * 2.0f - 1.0f; - - GSVertexPT1 vertices[] = - { - {GSVector4(dst.x, dst.y, 0.0f, 0.0f), GSVector2(src.x, src.y)}, - {GSVector4(dst.z, dst.y, 0.0f, 0.0f), GSVector2(src.z, src.y)}, - {GSVector4(dst.x, dst.w, 0.0f, 0.0f), GSVector2(src.x, src.w)}, - {GSVector4(dst.z, dst.w, 0.0f, 0.0f), GSVector2(src.z, src.w)}, - }; - - dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); - } - } - - // - - dev->BeginScene(); - - // om - - if (m_context->TEST.ZTE) - { - om_dssel.ztst = m_context->TEST.ZTST; - om_dssel.zwe = !m_context->ZBUF.ZMSK; - } - else - { - om_dssel.ztst = ZTST_ALWAYS; - } - - // vs - - vs_sel.wildhack = (UserHacks_WildHack && !isPackedUV_HackFlag) ? 1 : 0; - - // The real GS appears to do no masking based on the Z buffer format and writing larger Z values - // than the buffer supports seems to be an error condition on the real GS, causing it to crash. - // We are probably receiving bad coordinates from VU1 in these cases. - - if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) - { - if (m_context->ZBUF.PSM == PSM_PSMZ24) - { - if (m_vt.m_max.p.z > 0xffffff) - { - ASSERT(m_vt.m_min.p.z > 0xffffff); - // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. - if (m_vt.m_min.p.z > 0xffffff) - { - GL_INS("Bad Z size on 24 bits buffers") - vs_sel.bppz = 1; - om_dssel.ztst = ZTST_ALWAYS; - } - } - } - else if (m_context->ZBUF.PSM == PSM_PSMZ16 || m_context->ZBUF.PSM == PSM_PSMZ16S) - { - if (m_vt.m_max.p.z > 0xffff) - { - ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo - // Fixme : Same as above, I guess. - if (m_vt.m_min.p.z > 0xffff) - { - GL_INS("Bad Z size on 16 bits buffers") - vs_sel.bppz = 2; - om_dssel.ztst = ZTST_ALWAYS; - } - } - } - } - - // FIXME Opengl support half pixel center (as dx10). Code could be easier!!! - float sx = 2.0f * rtscale.x / (rtsize.x << 4); - float sy = 2.0f * rtscale.y / (rtsize.y << 4); - float ox = (float)(int)m_context->XYOFFSET.OFX; - float oy = (float)(int)m_context->XYOFFSET.OFY; - float ox2 = -1.0f / rtsize.x; - float oy2 = -1.0f / rtsize.y; - - //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, - //because DX10 and DX9 have a different pixel center.) - // - //The resulting shifted output aligns better with common blending / corona / blurring effects, - //but introduces a few bad pixels on the edges. - - if (rt && rt->LikelyOffset) - { - ox2 *= rt->OffsetHack_modx; - oy2 *= rt->OffsetHack_mody; - } - - // Note: DX does y *= -1.0 - vs_cb.Vertex_Scale_Offset = GSVector4(sx, sy, ox * sx + ox2 + 1, oy * sy + oy2 + 1); - // END of FIXME - - // GS_SPRITE_CLASS are already flat (either by CPU or the GS) - ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP; - - if (DATE_GL45) { - ps_sel.date = 5 + m_context->TEST.DATM; - } else if (DATE) { - if (DATE_GL42) - ps_sel.date = 1 + m_context->TEST.DATM; - else - om_dssel.date = 1; - } - - ps_sel.fba = m_context->FBA.FBA; - - if (PRIM->FGE) - { - ps_sel.fog = 1; - - GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.u32[0]); -#if _M_SSE >= 0x401 - // Blend AREF to avoid to load a random value for alpha (dirty cache) - ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF); -#else - ps_cb.FogColor_AREF = fc; -#endif - } - - if (m_context->TEST.ATE) - ps_sel.atst = m_context->TEST.ATST; - else - ps_sel.atst = ATST_ALWAYS; - - if (m_context->TEST.ATE && m_context->TEST.ATST > 1) - ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF; - - // By default don't use texture - ps_sel.tfx = 4; - bool spritehack = false; - int atst = ps_sel.atst; - - if (tex) - { - const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; - const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; - bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0; - bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2; - // Don't force extra filtering on sprite (it creates various upscaling issue) - bilinear &= !((m_vt.m_primclass == GS_SPRITE_CLASS) && m_userhacks_round_sprite_offset && !m_vt.IsLinear()); - - ps_sel.wms = m_context->CLAMP.WMS; - ps_sel.wmt = m_context->CLAMP.WMT; - - // Performance note: - // 1/ Don't set 0 as it is the default value - // 2/ Only keep aem when it is useful (avoid useless shader permutation) - if (ps_sel.shuffle) { - // Force a 32 bits access (normally shuffle is done on 16 bits) - // ps_sel.tex_fmt = 0; // removed as an optimization - ps_sel.aem = m_env.TEXA.AEM; - ASSERT(tex->m_target); - - // Shuffle is a 16 bits format, so aem is always required - GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); - ta /= 255.0f; - // FIXME rely on compiler for the optimization - ps_cb.TA_Af.x = ta.x; - ps_cb.TA_Af.y = ta.y; - - // FIXME: it is likely a bad idea to do the bilinear interpolation here - // bilinear &= m_vt.IsLinear(); - - } else if (tex->m_target) { - // Use an old target. AEM and index aren't resolved it must be done - // on the GPU - - // Select the 32/24/16 bits color (AEM) - ps_sel.tex_fmt = cpsm.fmt; - ps_sel.aem = m_env.TEXA.AEM; - - // Don't upload AEM if format is 32 bits - if (cpsm.fmt) { - GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); - ta /= 255.0f; - // FIXME rely on compiler for the optimization - ps_cb.TA_Af.x = ta.x; - ps_cb.TA_Af.y = ta.y; - } - - // Select the index format - if (tex->m_palette) { - // FIXME Potentially improve fmt field in GSLocalMemory - if (m_context->TEX0.PSM == PSM_PSMT4HL) - ps_sel.tex_fmt |= 1 << 2; - else if (m_context->TEX0.PSM == PSM_PSMT4HH) - ps_sel.tex_fmt |= 2 << 2; - else - ps_sel.tex_fmt |= 3 << 2; - - // Alpha channel of the RT is reinterpreted as an index. Star - // Ocean 3 uses it to emulate a stencil buffer. It is a very - // bad idea to force bilinear filtering on it. - bilinear &= m_vt.IsLinear(); - } - - } else if (tex->m_palette) { - // Use a standard 8 bits texture. AEM is already done on the CLUT - // Therefore you only need to set the index - // ps_sel.aem = 0; // removed as an optimization - - // Note 4 bits indexes are converted to 8 bits - ps_sel.tex_fmt = 3 << 2; - - } else { - // Standard texture. Both index and AEM expansion were already done by the CPU. - // ps_sel.tex_fmt = 0; // removed as an optimization - // ps_sel.aem = 0; // removed as an optimization - } - - if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) { - // Micro optimization that reduces GPU load (removes 5 instructions on the FS program) - ps_sel.tfx = TFX_DECAL; - } else { - ps_sel.tfx = m_context->TEX0.TFX; - } - - ps_sel.tcc = m_context->TEX0.TCC; - - ps_sel.ltf = bilinear && !simple_sample; - spritehack = tex->m_spritehack_t; - - int w = tex->m_texture->GetWidth(); - int h = tex->m_texture->GetHeight(); - - int tw = (int)(1 << m_context->TEX0.TW); - int th = (int)(1 << m_context->TEX0.TH); - - GSVector4 WH(tw, th, w, h); - - ps_sel.fst = !!PRIM->FST; - - ps_cb.WH = WH; - ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); - if ((m_context->CLAMP.WMS | m_context->CLAMP.WMT) > 1) { - ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV); - ps_cb.MinMax = GSVector4(ps_cb.MskFix) / WH.xyxy(); - } - - // TC Offset Hack - ps_sel.tcoffsethack = !!UserHacks_TCOffset; - ps_cb.TC_OH_TS = GSVector4(1/16.0f, 1/16.0f, UserHacks_TCO_x, UserHacks_TCO_y) / WH.xyxy(); - - - // Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader - ps_ssel.tau = (m_context->CLAMP.WMS != CLAMP_CLAMP); - ps_ssel.tav = (m_context->CLAMP.WMT != CLAMP_CLAMP); - ps_ssel.ltf = bilinear && simple_sample; - ps_ssel.aniso = simple_sample; - - // Setup Texture ressources - dev->SetupSampler(ps_ssel); - dev->PSSetShaderResources(tex->m_texture, tex->m_palette); - - if (spritehack && (ps_sel.atst == 2)) { - ps_sel.atst = 1; - } - } else { -#ifdef ENABLE_OGL_DEBUG - // Unattach texture to avoid noise in debugger - dev->PSSetShaderResources(NULL, NULL); -#endif - } - // Always bind the RT. This way special effect can use it. - dev->PSSetShaderResource(3, rt); - - - // GS - -#if 0 - if (m_vt.m_primclass == GS_POINT_CLASS) { - // Upscaling point will create aliasing because point has a size of 0 pixels. - // This code tries to replace point with sprite. So a point in 4x will be replaced by - // a 4x4 sprite. - gs_sel.point = 1; - // FIXME this formula is potentially wrong - GSVector4 point_size = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y) * 2.0f; - vs_cb.TextureScale = vs_cb.TextureScale.xyxy(point_size); - } -#endif - gs_sel.sprite = m_vt.m_primclass == GS_SPRITE_CLASS; - - dev->SetupVS(vs_sel); - dev->SetupGS(gs_sel); - dev->SetupPS(ps_sel); - - // rs - - GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * m_context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); - - GL_PUSH("IA"); - SetupIA(); - GL_POP(); - - dev->OMSetColorMaskState(om_csel); - dev->SetupOM(om_dssel); - - dev->SetupCB(&vs_cb, &ps_cb); - - if (DATE_GL42) { - GL_PUSH("Date GL42"); - // It could be good idea to use stencil in the same time. - // Early stencil test will reduce the number of atomic-load operation - - // Create an r32i image that will contain primitive ID - // Note: do it at the beginning because the clean will dirty the FBO state - //dev->InitPrimDateTexture(rtsize.x, rtsize.y); - - // I don't know how much is it legal to mount rt as Texture/RT. No write is done. - // In doubt let's detach RT. - dev->OMSetRenderTargets(NULL, ds, &scissor); - - // Don't write anything on the color buffer - // Neither in the depth buffer - glDepthMask(false); - // Compute primitiveID max that pass the date test - SendDraw(false); - - // Ask PS to discard shader above the primitiveID max - glDepthMask(GLState::depth_mask); - - ps_sel.date = 3; - dev->SetupPS(ps_sel); - - // Be sure that first pass is finished ! - dev->Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); - - GL_POP(); - } - - if (ps_sel.hdr) { - hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA32F); - - dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false); - - dev->OMSetRenderTargets(hdr_rt, ds, &scissor); - } else { - dev->OMSetRenderTargets(rt, ds, &scissor); - } - - if (m_context->TEST.DoFirstPass()) - { - SendDraw(require_barrier); - } - - if (m_context->TEST.DoSecondPass()) - { - ASSERT(!m_env.PABE.PABE); - - static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; - - ps_sel.atst = iatst[atst]; - if (spritehack && (ps_sel.atst == 2)) { - ps_sel.atst = 1; - } - - dev->SetupPS(ps_sel); - - bool z = om_dssel.zwe; - bool r = om_csel.wr; - bool g = om_csel.wg; - bool b = om_csel.wb; - bool a = om_csel.wa; - - switch(m_context->TEST.AFAIL) - { - case AFAIL_KEEP: z = r = g = b = a = false; break; // none - case AFAIL_FB_ONLY: z = false; break; // rgba - case AFAIL_ZB_ONLY: r = g = b = a = false; break; // z - case AFAIL_RGB_ONLY: z = a = false; break; // rgb - default: __assume(0); - } - - if (z || r || g || b || a) - { - om_dssel.zwe = z; - om_csel.wr = r; - om_csel.wg = g; - om_csel.wb = b; - om_csel.wa = a; - - dev->OMSetColorMaskState(om_csel); - dev->SetupOM(om_dssel); - - SendDraw(require_barrier); - } - } - - if (DATE_GL42) { - dev->RecycleDateTexture(); - } - - dev->EndScene(); - - // Warning: EndScene must be called before StretchRect otherwise - // vertices will be overwritten. Trust me you don't want to do that. - if (hdr_rt) { - GSVector4 dRect(ComputeBoundingBox(rtscale, rtsize)); - GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy(); - dev->StretchRect(hdr_rt, sRect, rt, dRect, ShaderConvert_MOD_256, false); - - dev->Recycle(hdr_rt); - } - - GL_POP(); -} diff --git a/plugins/GSdx_legacy/GSRendererOGL.h b/plugins/GSdx_legacy/GSRendererOGL.h deleted file mode 100644 index dd6c7eab60..0000000000 --- a/plugins/GSdx_legacy/GSRendererOGL.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRendererHW.h" - -#include "GSRenderer.h" -#include "GSTextureCacheOGL.h" -#include "GSVertexHW.h" - -class GSRendererOGL final : public GSRendererHW -{ - enum PRIM_OVERLAP { - PRIM_OVERLAP_UNKNOW, - PRIM_OVERLAP_YES, - PRIM_OVERLAP_NO - }; - - enum ACC_BLEND { - ACC_BLEND_NONE = 0, - ACC_BLEND_FREE = 1, - ACC_BLEND_SPRITE = 2, - ACC_BLEND_CCLIP_DALPHA = 3, - ACC_BLEND_FULL = 4, - ACC_BLEND_ULTRA = 5 - }; - - private: - bool m_accurate_date; - int m_sw_blending; - PRIM_OVERLAP m_prim_overlap; - bool m_unsafe_fbmask; - vector m_drawlist; - - unsigned int UserHacks_TCOffset; - float UserHacks_TCO_x, UserHacks_TCO_y; - bool UserHacks_safe_fbmask; - - GSDeviceOGL::VSConstantBuffer vs_cb; - GSDeviceOGL::PSConstantBuffer ps_cb; - - GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize); - - private: - void EmulateGS(); - void SetupIA(); - bool EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel); - bool EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_GL42); - - public: - GSRendererOGL(); - virtual ~GSRendererOGL() {}; - - bool CreateDevice(GSDevice* dev); - - void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) final; - - PRIM_OVERLAP PrimitiveOverlap(); - - void SendDraw(bool require_barrier); -}; diff --git a/plugins/GSdx_legacy/GSRendererSW.cpp b/plugins/GSdx_legacy/GSRendererSW.cpp deleted file mode 100644 index 6e7f6bb9e3..0000000000 --- a/plugins/GSdx_legacy/GSRendererSW.cpp +++ /dev/null @@ -1,1675 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSRendererSW.h" - -#define LOG 0 - -static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL; - -const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); - -#if _M_SSE >= 0x501 -const GSVector8 g_pos_scale2(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); -#endif - -GSRendererSW::GSRendererSW(int threads) - : m_fzb(NULL) -{ - m_nativeres = true; // ignore ini, sw is always native - - m_tc = new GSTextureCacheSW(this); - - memset(m_texture, 0, sizeof(m_texture)); - - m_rl = GSRasterizerList::Create(threads, &m_perfmon); - - m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); - - for (uint32 i = 0; i < countof(m_fzb_pages); i++) { - m_fzb_pages[i] = 0; - } - for (uint32 i = 0; i < countof(m_tex_pages); i++) { - m_tex_pages[i] = 0; - } - - #define InitCVB(P) \ - m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer; \ - m_cvb[P][0][1] = &GSRendererSW::ConvertVertexBuffer; \ - m_cvb[P][1][0] = &GSRendererSW::ConvertVertexBuffer; \ - m_cvb[P][1][1] = &GSRendererSW::ConvertVertexBuffer; \ - - InitCVB(GS_POINT_CLASS); - InitCVB(GS_LINE_CLASS); - InitCVB(GS_TRIANGLE_CLASS); - InitCVB(GS_SPRITE_CLASS); -} - -GSRendererSW::~GSRendererSW() -{ - delete m_tc; - - for(size_t i = 0; i < countof(m_texture); i++) - { - delete m_texture[i]; - } - - delete m_rl; - - _aligned_free(m_output); -} - -void GSRendererSW::Reset() -{ - Sync(-1); - - m_tc->RemoveAll(); - - GSRenderer::Reset(); -} - -void GSRendererSW::VSync(int field) -{ - Sync(0); // IncAge might delete a cached texture in use - - if(0) if(LOG) - { - fprintf(s_fp, "%lld\n", m_perfmon.GetFrame()); - - GSVector4i dr = GetDisplayRect(); - GSVector4i fr = GetFrameRect(); - GSVector2i ds = GetDeviceSize(); - - fprintf(s_fp, "dr %d %d %d %d, fr %d %d %d %d, ds %d %d\n", - dr.x, dr.y, dr.z, dr.w, - fr.x, fr.y, fr.z, fr.w, - ds.x, ds.y); - - for(int i = 0; i < 2; i++) - { - if(i == 0 && !m_regs->PMODE.EN1) continue; - if(i == 1 && !m_regs->PMODE.EN2) continue; - - fprintf(s_fp, "DISPFB[%d] BP=%05x BW=%d PSM=%d DBX=%d DBY=%d\n", - i, - m_regs->DISP[i].DISPFB.Block(), - m_regs->DISP[i].DISPFB.FBW, - m_regs->DISP[i].DISPFB.PSM, - m_regs->DISP[i].DISPFB.DBX, - m_regs->DISP[i].DISPFB.DBY - ); - - fprintf(s_fp, "DISPLAY[%d] DX=%d DY=%d DW=%d DH=%d MAGH=%d MAGV=%d\n", - i, - m_regs->DISP[i].DISPLAY.DX, - m_regs->DISP[i].DISPLAY.DY, - m_regs->DISP[i].DISPLAY.DW, - m_regs->DISP[i].DISPLAY.DH, - m_regs->DISP[i].DISPLAY.MAGH, - m_regs->DISP[i].DISPLAY.MAGV - ); - } - - fprintf(s_fp, "PMODE EN1=%d EN2=%d CRTMD=%d MMOD=%d AMOD=%d SLBG=%d ALP=%d\n", - m_regs->PMODE.EN1, - m_regs->PMODE.EN2, - m_regs->PMODE.CRTMD, - m_regs->PMODE.MMOD, - m_regs->PMODE.AMOD, - m_regs->PMODE.SLBG, - m_regs->PMODE.ALP - ); - - fprintf(s_fp, "SMODE1 CLKSEL=%d CMOD=%d EX=%d GCONT=%d LC=%d NVCK=%d PCK2=%d PEHS=%d PEVS=%d PHS=%d PRST=%d PVS=%d RC=%d SINT=%d SLCK=%d SLCK2=%d SPML=%d T1248=%d VCKSEL=%d VHP=%d XPCK=%d\n", - m_regs->SMODE1.CLKSEL, - m_regs->SMODE1.CMOD, - m_regs->SMODE1.EX, - m_regs->SMODE1.GCONT, - m_regs->SMODE1.LC, - m_regs->SMODE1.NVCK, - m_regs->SMODE1.PCK2, - m_regs->SMODE1.PEHS, - m_regs->SMODE1.PEVS, - m_regs->SMODE1.PHS, - m_regs->SMODE1.PRST, - m_regs->SMODE1.PVS, - m_regs->SMODE1.RC, - m_regs->SMODE1.SINT, - m_regs->SMODE1.SLCK, - m_regs->SMODE1.SLCK2, - m_regs->SMODE1.SPML, - m_regs->SMODE1.T1248, - m_regs->SMODE1.VCKSEL, - m_regs->SMODE1.VHP, - m_regs->SMODE1.XPCK - ); - - fprintf(s_fp, "SMODE2 INT=%d FFMD=%d DPMS=%d\n", - m_regs->SMODE2.INT, - m_regs->SMODE2.FFMD, - m_regs->SMODE2.DPMS - ); - - fprintf(s_fp, "SRFSH %08x_%08x\n", - m_regs->SRFSH.u32[0], - m_regs->SRFSH.u32[1] - ); - - fprintf(s_fp, "SYNCH1 %08x_%08x\n", - m_regs->SYNCH1.u32[0], - m_regs->SYNCH1.u32[1] - ); - - fprintf(s_fp, "SYNCH2 %08x_%08x\n", - m_regs->SYNCH2.u32[0], - m_regs->SYNCH2.u32[1] - ); - - fprintf(s_fp, "SYNCV %08x_%08x\n", - m_regs->SYNCV.u32[0], - m_regs->SYNCV.u32[1] - ); - - fprintf(s_fp, "CSR %08x_%08x\n", - m_regs->CSR.u32[0], - m_regs->CSR.u32[1] - ); - - fflush(s_fp); - } - - /* - int draw[8], sum = 0; - - for(size_t i = 0; i < countof(draw); i++) - { - draw[i] = m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); - sum += draw[i]; - } - - printf("CPU %d Sync %d W %d %d %d %d %d %d %d %d (%d)\n", - m_perfmon.CPU(GSPerfMon::Main), - m_perfmon.CPU(GSPerfMon::Sync), - draw[0], draw[1], draw[2], draw[3], draw[4], draw[5], draw[6], draw[7], sum); - - // - */ - - GSRenderer::VSync(field); - - m_tc->IncAge(); - - // if((m_perfmon.GetFrame() & 255) == 0) m_rl->PrintStats(); -} - -void GSRendererSW::ResetDevice() -{ - for(size_t i = 0; i < countof(m_texture); i++) - { - delete m_texture[i]; - - m_texture[i] = NULL; - } -} - -GSTexture* GSRendererSW::GetOutput(int i) -{ - Sync(1); - - const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB; - - int w = DISPFB.FBW * 64; - int h = GetFrameRect(i).bottom; - - // TODO: round up bottom - - if(m_dev->ResizeTexture(&m_texture[i], w, h)) - { - static int pitch = 1024 * 4; - - GSVector4i r(0, 0, w, h); - - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM]; - - (m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), r.ralign(psm.bs), m_output, pitch, m_env.TEXA); - - m_texture[i]->Update(r, m_output, pitch); - - if(s_dump) - { - if(s_savef && s_n >= s_saven) - { - m_texture[i]->Save(root_sw + format("%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), (int)DISPFB.PSM)); - } - - s_n++; - } - } - - return m_texture[i]; -} - -template -void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) -{ - #if 0//_M_SSE >= 0x501 - - // TODO: something isn't right here, this makes other functions slower (split load/store? old sse code in 3rd party lib?) - - GSVector8i o2((GSVector4i)m_context->XYOFFSET); - GSVector8 tsize2(GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0)); - - for(int i = (int)m_vertex.next; i > 0; i -= 2, src += 2, dst += 2) // ok to overflow, allocator makes sure there is one more dummy vertex - { - GSVector8i v0 = GSVector8i::load(src[0].m); - GSVector8i v1 = GSVector8i::load(src[1].m); - - GSVector8 stcq = GSVector8::cast(v0.ac(v1)); - GSVector8i xyzuvf = v0.bd(v1); - - //GSVector8 stcq = GSVector8::load(&src[0].m[0], &src[1].m[0]); - //GSVector8i xyzuvf = GSVector8i::load(&src[0].m[1], &src[1].m[1]); - - GSVector8i xy = xyzuvf.upl16() - o2; - GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00()); - - GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * g_pos_scale2; - GSVector8 c = GSVector8(GSVector8i::cast(stcq).uph8().upl16() << 7); - - GSVector8 t = GSVector8::zero(); - - if(tme) - { - if(fst) - { - t = GSVector8(xyzuvf.uph16() << (16 - 4)); - } - else - { - t = stcq.xyww() * tsize2; - } - } - - if(primclass == GS_SPRITE_CLASS) - { - t = t.insert32<1, 3>(GSVector8::cast(xyzuvf)); - } - - GSVector8::storel(&dst[0].p, p); - - if(tme || primclass == GS_SPRITE_CLASS) - { - GSVector8::store(&dst[0].t, t.ac(c)); - } - else - { - GSVector8::storel(&dst[0].c, c); - } - - GSVector8::storeh(&dst[1].p, p); - - if(tme || primclass == GS_SPRITE_CLASS) - { - GSVector8::store(&dst[1].t, t.bd(c)); - } - else - { - GSVector8::storeh(&dst[1].c, c); - } - } - - #else - - GSVector4i off = (GSVector4i)m_context->XYOFFSET; - GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0); - - for(int i = (int)m_vertex.next; i > 0; i--, src++, dst++) - { - GSVector4 stcq = GSVector4::load(&src->m[0]); // s t rgba q - - #if _M_SSE >= 0x401 - - GSVector4i xyzuvf(src->m[1]); - - GSVector4i xy = xyzuvf.upl16() - off; - GSVector4i zf = xyzuvf.ywww().min_u32(GSVector4i::xffffff00()); - - #else - - uint32 z = src->XYZ.Z; - - GSVector4i xy = GSVector4i::load((int)src->XYZ.u32[0]).upl16() - off; - GSVector4i zf = GSVector4i((int)std::min(z, 0xffffff00), src->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later - - #endif - - dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; - dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7); - - GSVector4 t = GSVector4::zero(); - - if(tme) - { - if(fst) - { - #if _M_SSE >= 0x401 - - t = GSVector4(xyzuvf.uph16() << (16 - 4)); - - #else - - t = GSVector4(GSVector4i::load(src->UV).upl16() << (16 - 4)); - - #endif - } - else - { - t = stcq.xyww() * tsize; - } - } - - if(primclass == GS_SPRITE_CLASS) - { - #if _M_SSE >= 0x401 - - t = t.insert32<1, 3>(GSVector4::cast(xyzuvf)); - - #else - - t = t.insert32<0, 3>(GSVector4::cast(GSVector4i::load(z))); - - #endif - } - - dst->t = t; - - #if 0 //_M_SSE >= 0x501 - - dst->_pad = GSVector4::zero(); - - #endif - } - - #endif -} - -void GSRendererSW::Draw() -{ - const GSDrawingContext* context = m_context; - - SharedData* sd = new SharedData(this); - - shared_ptr data(sd); - - sd->primclass = m_vt.m_primclass; - sd->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(uint32) * m_index.tail, 64); - sd->vertex = (GSVertexSW*)sd->buff; - sd->vertex_count = m_vertex.next; - sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1)); - sd->index_count = m_index.tail; - - (this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST])(sd->vertex, m_vertex.buff, m_vertex.next); - - memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail); - - GSVector4i scissor = GSVector4i(context->scissor.in); - GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil())); - - // points and lines may have zero area bbox (single line: 0, 0 - 256, 0) - - if(m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) - { - if(bbox.x == bbox.z) bbox.z++; - if(bbox.y == bbox.w) bbox.w++; - } - - GSVector4i r = bbox.rintersect(scissor); - - scissor.z = std::min(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour - - sd->scissor = scissor; - sd->bbox = bbox; - sd->frame = m_perfmon.GetFrame(); - - if(!GetScanlineGlobalData(sd)) - { - s_n += 3; // Keep it sync with HW renderer - return; - } - - if(0) if(LOG) - { - int n = GSUtil::GetVertexCount(PRIM->PRIM); - - for(uint32 i = 0, j = 0; i < m_index.tail; i += n, j++) - { - for(int k = 0; k < n; k++) - { - GSVertex* v = &m_vertex.buff[m_index.buff[i + k]]; - GSVertex* vn = &m_vertex.buff[m_index.buff[i + n - 1]]; - - fprintf(s_fp, "%d:%d %f %f %f %f\n", - j, k, - (float)(v->XYZ.X - context->XYOFFSET.OFX) / 16, - (float)(v->XYZ.Y - context->XYOFFSET.OFY) / 16, - PRIM->FST ? (float)(v->U) / 16 : v->ST.S / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q), - PRIM->FST ? (float)(v->V) / 16 : v->ST.T / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q) - ); - } - } - } - - // - - // GSScanlineGlobalData& gd = sd->global; - - uint32* fb_pages = NULL; - uint32* zb_pages = NULL; - - if(sd->global.sel.fb) - { - fb_pages = m_context->offset.fb->GetPages(r); - } - - if(sd->global.sel.zb) - { - zb_pages = m_context->offset.zb->GetPages(r); - } - - // check if there is an overlap between this and previous targets - - if(CheckTargetPages(fb_pages, zb_pages, r)) - { - sd->m_syncpoint = SharedData::SyncTarget; - } - - // check if the texture is not part of a target currently in use - - if(CheckSourcePages(sd)) - { - sd->m_syncpoint = SharedData::SyncSource; - } - - // addref source and target pages - - sd->UsePages(fb_pages, m_context->offset.fb->psm, zb_pages, m_context->offset.zb->psm); - - // - - if(s_dump) - { - Sync(2); - - uint64 frame = m_perfmon.GetFrame(); - // Dump the texture in 32 bits format. It helps to debug texture shuffle effect - // It will breaks the few games that really uses 16 bits RT - bool texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)); - - string s; - - if(s_n >= s_saven) - { - // Dump Register state - s = format("%05d_context.txt", s_n); - - m_env.Dump(root_sw+s); - m_context->Dump(root_sw+s); - } - - if(s_savet && s_n >= s_saven && PRIM->TME) - { - if (texture_shuffle) { - // Dump the RT in 32 bits format. It helps to debug texture shuffle effect - s = format("%05d_f%lld_tex_%05x_32bits.bmp", s_n, frame, (int)m_context->TEX0.TBP0); - m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, 0, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); - } - - s = format("%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); - m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); - } - - s_n++; - - if(s_save && s_n >= s_saven) - { - - if (texture_shuffle) { - // Dump the RT in 32 bits format. It helps to debug texture shuffle effect - s = format("%05d_f%lld_rt0_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block()); - m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); - } - - s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); - m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); - } - - if(s_savez && s_n >= s_saven) - { - s = format("%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); - - m_mem.SaveBMP(root_sw+s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); - } - - s_n++; - - Queue(data); - - Sync(3); - - if(s_save && s_n >= s_saven) - { - if (texture_shuffle) { - // Dump the RT in 32 bits format. It helps to debug texture shuffle effect - s = format("%05d_f%lld_rt1_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block()); - m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); - } - - s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); - m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); - } - - if(s_savez && s_n >= s_saven) - { - s = format("%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); - - m_mem.SaveBMP(root_sw+s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); - } - - s_n++; - - if(s_savel > 0 && (s_n - s_saven) > s_savel) - { - s_dump = 0; - } - } - else - { - Queue(data); - } - - /* - if(0)//stats.ticks > 5000000) - { - printf("* [%lld | %012llx] ticks %lld prims %d (%d) pixels %d (%d)\n", - m_perfmon.GetFrame(), gd->sel.key, - stats.ticks, - stats.prims, stats.prims > 0 ? (int)(stats.ticks / stats.prims) : -1, - stats.pixels, stats.pixels > 0 ? (int)(stats.ticks / stats.pixels) : -1); - } - */ -} - -void GSRendererSW::Queue(shared_ptr& item) -{ - SharedData* sd = (SharedData*)item.get(); - - if(sd->m_syncpoint == SharedData::SyncSource) - { - Sync(4); - } - - // update previously invalidated parts - - sd->UpdateSource(); - - if(sd->m_syncpoint == SharedData::SyncTarget) - { - Sync(5); - } - - if(LOG) - { - GSScanlineGlobalData& gd = ((SharedData*)item.get())->global; - - fprintf(s_fp, "[%d] queue %05x %d (%d) %05x %d (%d) %05x %d %dx%d (%d %d %d) | %d %d %d\n", - sd->counter, - m_context->FRAME.Block(), m_context->FRAME.PSM, gd.sel.fwrite, - m_context->ZBUF.Block(), m_context->ZBUF.PSM, gd.sel.zwrite, - PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH, m_context->TEX0.CSM, m_context->TEX0.CPSM, m_context->TEX0.CSA, - PRIM->PRIM, sd->vertex_count, sd->index_count); - - fflush(s_fp); - } - - m_rl->Queue(item); - - // invalidate new parts rendered onto - - if(sd->global.sel.fwrite) - { - m_tc->InvalidatePages(sd->m_fb_pages, sd->m_fpsm); - } - - if(sd->global.sel.zwrite) - { - m_tc->InvalidatePages(sd->m_zb_pages, sd->m_zpsm); - } -} - -void GSRendererSW::Sync(int reason) -{ - //printf("sync %d\n", reason); - - GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync); - - uint64 t = __rdtsc(); - - m_rl->Sync(); - - if(0) if(LOG) - { - s_n++; - - std::string s; - - if(s_save) - { - s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); - - m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); - } - - if(s_savez) - { - s = format("%05d_f%lld_zb1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); - - m_mem.SaveBMP(root_sw+s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); - } - } - - t = __rdtsc() - t; - - int pixels = m_rl->GetPixels(); - - if(LOG) {fprintf(s_fp, "sync n=%d r=%d t=%lld p=%d %c\n", s_n, reason, t, pixels, t > 10000000 ? '*' : ' '); fflush(s_fp);} - - m_perfmon.Put(GSPerfMon::Fillrate, pixels); -} - -void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) -{ - if(LOG) {fprintf(s_fp, "w %05x %d %d, %d %d %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM, r.x, r.y, r.z, r.w); fflush(s_fp);} - - GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); - - off->GetPages(r, m_tmp_pages); - - // check if the changing pages either used as a texture or a target - - if(!m_rl->IsSynced()) - { - for(uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++) - { - if(m_fzb_pages[*p] | m_tex_pages[*p]) - { - Sync(6); - - break; - } - } - } - - m_tc->InvalidatePages(m_tmp_pages, off->psm); // if texture update runs on a thread and Sync(5) happens then this must come later -} - -void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) -{ - if(LOG) {fprintf(s_fp, "%s %05x %d %d, %d %d %d %d\n", clut ? "rp" : "r", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w); fflush(s_fp);} - - if(!m_rl->IsSynced()) - { - GSOffset* off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); - - off->GetPages(r, m_tmp_pages); - - for(uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++) - { - if(m_fzb_pages[*p]) - { - Sync(7); - - break; - } - } - } -} - -void GSRendererSW::UsePages(const uint32* pages, const int type) -{ - for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - switch (type) { - case 0: - ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX); - m_fzb_pages[*p] += 1; - break; - case 1: - ASSERT((m_fzb_pages[*p] >> 16) < USHRT_MAX); - m_fzb_pages[*p] += 0x10000; - break; - case 2: - ASSERT(m_tex_pages[*p] < USHRT_MAX); - m_tex_pages[*p] += 1; - break; - default:break; - } - } -} - -void GSRendererSW::ReleasePages(const uint32* pages, const int type) -{ - for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - switch (type) { - case 0: - ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0); - m_fzb_pages[*p] -= 1; - break; - case 1: - ASSERT((m_fzb_pages[*p] >> 16) > 0); - m_fzb_pages[*p] -= 0x10000; - break; - case 2: - ASSERT(m_tex_pages[*p] > 0); - m_tex_pages[*p] -= 1; - break; - default:break; - } - } -} - -bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r) -{ - bool synced = m_rl->IsSynced(); - - bool fb = fb_pages != NULL; - bool zb = zb_pages != NULL; - - bool res = false; - - if(m_fzb != m_context->offset.fzb4) - { - // targets changed, check everything - - m_fzb = m_context->offset.fzb4; - m_fzb_bbox = r; - - if(fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r); - if(zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r); - - memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages)); - - uint32 used = 0; - - for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++) - { - uint32 i = *p; - - uint32 row = i >> 5; - uint32 col = 1 << (i & 31); - - m_fzb_cur_pages[row] |= col; - - used |= m_fzb_pages[i]; - } - - for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++) - { - uint32 i = *p; - - uint32 row = i >> 5; - uint32 col = 1 << (i & 31); - - m_fzb_cur_pages[row] |= col; - - used |= m_fzb_pages[i]; - } - - if(!synced) - { - if(used) - { - if(LOG) {fprintf(s_fp, "syncpoint 0\n"); fflush(s_fp);} - - res = true; - } - - //if(LOG) {fprintf(s_fp, "no syncpoint *\n"); fflush(s_fp);} - } - } - else - { - // same target, only check new areas and cross-rendering between frame and z-buffer - - GSVector4i bbox = m_fzb_bbox.runion(r); - - bool check = !m_fzb_bbox.eq(bbox); - - m_fzb_bbox = bbox; - - if(check) - { - // drawing area is larger than previous time, check new parts only to avoid false positives (m_fzb_cur_pages guards) - - if(fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r); - if(zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r); - - uint32 used = 0; - - for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++) - { - uint32 i = *p; - - uint32 row = i >> 5; - uint32 col = 1 << (i & 31); - - if((m_fzb_cur_pages[row] & col) == 0) - { - m_fzb_cur_pages[row] |= col; - - used |= m_fzb_pages[i]; - } - } - - for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++) - { - uint32 i = *p; - - uint32 row = i >> 5; - uint32 col = 1 << (i & 31); - - if((m_fzb_cur_pages[row] & col) == 0) - { - m_fzb_cur_pages[row] |= col; - - used |= m_fzb_pages[i]; - } - } - - if(!synced) - { - if(used) - { - if(LOG) {fprintf(s_fp, "syncpoint 1\n"); fflush(s_fp);} - - res = true; - } - } - } - - if(!synced) - { - // chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue, - // have to be careful when the two buffers are mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300) - - if(fb && !res) - { - for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++) - { - if(m_fzb_pages[*p] & 0xffff0000) - { - if(LOG) {fprintf(s_fp, "syncpoint 2\n"); fflush(s_fp);} - - res = true; - - break; - } - } - } - - if(zb && !res) - { - for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++) - { - if(m_fzb_pages[*p] & 0x0000ffff) - { - if(LOG) {fprintf(s_fp, "syncpoint 3\n"); fflush(s_fp);} - - res = true; - - break; - } - } - } - } - } - - if(!fb && fb_pages != NULL) delete [] fb_pages; - if(!zb && zb_pages != NULL) delete [] zb_pages; - - return res; -} - -bool GSRendererSW::CheckSourcePages(SharedData* sd) -{ - if(!m_rl->IsSynced()) - { - for(size_t i = 0; sd->m_tex[i].t != NULL; i++) - { - sd->m_tex[i].t->m_offset->GetPages(sd->m_tex[i].r, m_tmp_pages); - - uint32* pages = m_tmp_pages; // sd->m_tex[i].t->m_pages.n; - - for(const uint32* p = pages; *p != GSOffset::EOP; p++) - { - // TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2) - - if(m_fzb_pages[*p]) // currently being drawn to? => sync - { - return true; - } - } - } - } - - return false; -} - -#include "GSTextureSW.h" - -bool GSRendererSW::GetScanlineGlobalData(SharedData* data) -{ - GSScanlineGlobalData& gd = data->global; - - const GSDrawingEnvironment& env = m_env; - const GSDrawingContext* context = m_context; - const GS_PRIM_CLASS primclass = m_vt.m_primclass; - - gd.vm = m_mem.m_vm8; - - gd.fbr = context->offset.fb->pixel.row; - gd.zbr = context->offset.zb->pixel.row; - gd.fbc = context->offset.fb->pixel.col[0]; - gd.zbc = context->offset.zb->pixel.col[0]; - gd.fzbr = context->offset.fzb4->row; - gd.fzbc = context->offset.fzb4->col; - - gd.sel.key = 0; - - gd.sel.fpsm = 3; - gd.sel.zpsm = 3; - gd.sel.atst = ATST_ALWAYS; - gd.sel.tfx = TFX_NONE; - gd.sel.ababcd = 0xff; - gd.sel.prim = primclass; - - uint32 fm = context->FRAME.FBMSK; - uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; - - if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER) - { - fm = 0xffffffff; - zm = 0xffffffff; - } - - if(PRIM->TME) - { - if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) - { - m_mem.m_clut.Read32(context->TEX0, env.TEXA); - } - } - - if(context->TEST.ATE) - { - if(!TryAlphaTest(fm, zm)) - { - gd.sel.atst = context->TEST.ATST; - gd.sel.afail = context->TEST.AFAIL; - - gd.aref = GSVector4i((int)context->TEST.AREF); - - switch(gd.sel.atst) - { - case ATST_LESS: - gd.sel.atst = ATST_LEQUAL; - gd.aref -= GSVector4i::x00000001(); - break; - case ATST_GREATER: - gd.sel.atst = ATST_GEQUAL; - gd.aref += GSVector4i::x00000001(); - break; - } - } - } - - bool fwrite = fm != 0xffffffff; - bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; - - bool zwrite = zm != 0xffffffff; - bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS; - /* - printf("%05x %d %05x %d %05x %d %dx%d\n", - fwrite || ftest ? m_context->FRAME.Block() : 0xfffff, m_context->FRAME.PSM, - zwrite || ztest ? m_context->ZBUF.Block() : 0xfffff, m_context->ZBUF.PSM, - PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH); - */ - if(!fwrite && !zwrite) return false; - - gd.sel.fwrite = fwrite; - gd.sel.ftest = ftest; - - if(fwrite || ftest) - { - gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; - - if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) - { - gd.sel.iip = PRIM->IIP; - } - - if(PRIM->TME) - { - gd.sel.tfx = context->TEX0.TFX; - gd.sel.tcc = context->TEX0.TCC; - gd.sel.fst = PRIM->FST; - gd.sel.ltf = m_vt.IsLinear(); - - if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) - { - gd.sel.tlu = 1; - - gd.clut = (uint32*)_aligned_malloc(sizeof(uint32) * 256, 32); // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats - - memcpy(gd.clut, (const uint32*)m_mem.m_clut, sizeof(uint32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal); - } - - gd.sel.wms = context->CLAMP.WMS; - gd.sel.wmt = context->CLAMP.WMT; - - if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) - { - // modulate does not do anything when vertex color is 0x80 - - gd.sel.tfx = TFX_DECAL; - } - - bool mipmap = IsMipMapActive(); - - GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap); - - GSVector4i r; - - GetTextureMinMax(r, TEX0, context->CLAMP, gd.sel.ltf); - - GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA); - - if(t == NULL) {ASSERT(0); return false;} - - data->SetSource(t, r, 0); - - gd.sel.tw = t->m_tw - 3; - - if(mipmap) - { - // TEX1.MMIN - // 000 p - // 001 l - // 010 p round - // 011 p tri - // 100 l round - // 101 l tri - - if(m_vt.m_lod.x > 0) - { - gd.sel.ltf = context->TEX1.MMIN >> 2; - } - else - { - // TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0 - } - - gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri - gd.sel.lcm = context->TEX1.LCM; - - int mxl = std::min((int)context->TEX1.MXL, 6) << 16; - int k = context->TEX1.K << 12; - - if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL) - { - k = (int)m_vt.m_lod.x << 16; // set lod to max level - - gd.sel.lcm = 1; // lod is constant - gd.sel.mmin = 1; // tri-linear is meaningless - } - - if(gd.sel.mmin == 2) - { - mxl--; // don't sample beyond the last level (TODO: add a dummy level instead?) - } - - if(gd.sel.fst) - { - ASSERT(gd.sel.lcm == 1); - ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) - - gd.sel.lcm = 1; - } - - if(gd.sel.lcm) - { - int lod = std::max(std::min(k, mxl), 0); - - if(gd.sel.mmin == 1) - { - lod = (lod + 0x8000) & 0xffff0000; // rounding - } - - gd.lod.i = GSVector4i(lod >> 16); - gd.lod.f = GSVector4i(lod & 0xffff).xxxxl().xxzz(); - - // TODO: lot to optimize when lod is constant - } - else - { - gd.mxl = GSVector4((float)mxl); - gd.l = GSVector4((float)(-0x10000 << context->TEX1.L)); - gd.k = GSVector4((float)k); - } - - GIFRegTEX0 MIP_TEX0 = TEX0; - GIFRegCLAMP MIP_CLAMP = context->CLAMP; - - GSVector4 tmin = m_vt.m_min.t; - GSVector4 tmax = m_vt.m_max.t; - - static int s_counter = 0; - - for(int i = 1, j = std::min((int)context->TEX1.MXL, 6); i <= j; i++) - { - switch(i) - { - case 1: - MIP_TEX0.TBP0 = context->MIPTBP1.TBP1; - MIP_TEX0.TBW = context->MIPTBP1.TBW1; - break; - case 2: - MIP_TEX0.TBP0 = context->MIPTBP1.TBP2; - MIP_TEX0.TBW = context->MIPTBP1.TBW2; - break; - case 3: - MIP_TEX0.TBP0 = context->MIPTBP1.TBP3; - MIP_TEX0.TBW = context->MIPTBP1.TBW3; - break; - case 4: - MIP_TEX0.TBP0 = context->MIPTBP2.TBP4; - MIP_TEX0.TBW = context->MIPTBP2.TBW4; - break; - case 5: - MIP_TEX0.TBP0 = context->MIPTBP2.TBP5; - MIP_TEX0.TBW = context->MIPTBP2.TBW5; - break; - case 6: - MIP_TEX0.TBP0 = context->MIPTBP2.TBP6; - MIP_TEX0.TBW = context->MIPTBP2.TBW6; - break; - default: - __assume(0); - } - - if(MIP_TEX0.TW > 0) MIP_TEX0.TW--; - if(MIP_TEX0.TH > 0) MIP_TEX0.TH--; - - MIP_CLAMP.MINU >>= 1; - MIP_CLAMP.MINV >>= 1; - MIP_CLAMP.MAXU >>= 1; - MIP_CLAMP.MAXV >>= 1; - - m_vt.m_min.t *= 0.5f; - m_vt.m_max.t *= 0.5f; - - GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3); - - if(t == NULL) {ASSERT(0); return false;} - - GSVector4i r; - - GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf); - - data->SetSource(t, r, i); - } - - s_counter++; - - m_vt.m_min.t = tmin; - m_vt.m_max.t = tmax; - } - else - { - if(gd.sel.fst == 0) - { - // skip per pixel division if q is constant - - GSVertexSW* RESTRICT v = data->vertex; - - if(m_vt.m_eq.q) - { - gd.sel.fst = 1; - - const GSVector4& t = v[data->index[0]].t; - - if(t.z != 1.0f) - { - GSVector4 w = t.zzzz().rcpnr(); - - for(int i = 0, j = data->vertex_count; i < j; i++) - { - GSVector4 t = v[i].t; - - v[i].t = (t * w).xyzw(t); - } - } - } - else if(primclass == GS_SPRITE_CLASS) - { - gd.sel.fst = 1; - - for(int i = 0, j = data->vertex_count; i < j; i += 2) - { - GSVector4 t0 = v[i + 0].t; - GSVector4 t1 = v[i + 1].t; - - GSVector4 w = t1.zzzz().rcpnr(); - - v[i + 0].t = (t0 * w).xyzw(t0); - v[i + 1].t = (t1 * w).xyzw(t1); - } - } - } - - if(gd.sel.ltf && gd.sel.fst) - { - // if q is constant we can do the half pel shift for bilinear sampling on the vertices - - // TODO: but not when mipmapping is used!!! - - GSVector4 half(0x8000, 0x8000); - - GSVertexSW* RESTRICT v = data->vertex; - - for(int i = 0, j = data->vertex_count; i < j; i++) - { - GSVector4 t = v[i].t; - - v[i].t = (t - half).xyzw(t); - } - } - } - - uint16 tw = 1u << TEX0.TW; - uint16 th = 1u << TEX0.TH; - - switch(context->CLAMP.WMS) - { - case CLAMP_REPEAT: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = tw - 1; - gd.t.max.u16[0] = gd.t.minmax.u16[2] = 0; - gd.t.mask.u32[0] = 0xffffffff; - break; - case CLAMP_CLAMP: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = 0; - gd.t.max.u16[0] = gd.t.minmax.u16[2] = tw - 1; - gd.t.mask.u32[0] = 0; - break; - case CLAMP_REGION_CLAMP: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = std::min(context->CLAMP.MINU, tw - 1); - gd.t.max.u16[0] = gd.t.minmax.u16[2] = std::min(context->CLAMP.MAXU, tw - 1); - gd.t.mask.u32[0] = 0; - break; - case CLAMP_REGION_REPEAT: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU & (tw - 1); - gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU & (tw - 1); - gd.t.mask.u32[0] = 0xffffffff; - break; - default: - __assume(0); - } - - switch(context->CLAMP.WMT) - { - case CLAMP_REPEAT: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = th - 1; - gd.t.max.u16[4] = gd.t.minmax.u16[3] = 0; - gd.t.mask.u32[2] = 0xffffffff; - break; - case CLAMP_CLAMP: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = 0; - gd.t.max.u16[4] = gd.t.minmax.u16[3] = th - 1; - gd.t.mask.u32[2] = 0; - break; - case CLAMP_REGION_CLAMP: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = std::min(context->CLAMP.MINV, th - 1); - gd.t.max.u16[4] = gd.t.minmax.u16[3] = std::min(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256) - gd.t.mask.u32[2] = 0; - break; - case CLAMP_REGION_REPEAT: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127 - gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV & (th - 1); - gd.t.mask.u32[2] = 0xffffffff; - break; - default: - __assume(0); - } - - gd.t.min = gd.t.min.xxxxlh(); - gd.t.max = gd.t.max.xxxxlh(); - gd.t.mask = gd.t.mask.xxzz(); - gd.t.invmask = ~gd.t.mask; - } - - if(PRIM->FGE) - { - gd.sel.fge = 1; - - gd.frb = env.FOGCOL.u32[0] & 0x00ff00ff; - gd.fga = (env.FOGCOL.u32[0] >> 8) & 0x00ff00ff; - } - - if(context->FRAME.PSM != PSM_PSMCT24) - { - gd.sel.date = context->TEST.DATE; - gd.sel.datm = context->TEST.DATM; - } - - if(!IsOpaque()) - { - gd.sel.abe = PRIM->ABE; - gd.sel.ababcd = context->ALPHA.u32[0]; - - if(env.PABE.PABE) - { - gd.sel.pabe = 1; - } - - if(m_aa1 && PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS)) - { - gd.sel.aa1 = 1; - } - - gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh(); - } - - if(gd.sel.date - || gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1 - || gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY - || gd.sel.fpsm == 0 && fm != 0 && fm != 0xffffffff - || gd.sel.fpsm == 1 && (fm & 0x00ffffff) != 0 && (fm & 0x00ffffff) != 0x00ffffff - || gd.sel.fpsm == 2 && (fm & 0x80f8f8f8) != 0 && (fm & 0x80f8f8f8) != 0x80f8f8f8) - { - gd.sel.rfb = 1; - } - - gd.sel.colclamp = env.COLCLAMP.CLAMP; - gd.sel.fba = context->FBA.FBA; - - if(env.DTHE.DTHE) - { - gd.sel.dthe = 1; - - gd.dimx = (GSVector4i*)_aligned_malloc(sizeof(env.dimx), 32); - - memcpy(gd.dimx, env.dimx, sizeof(env.dimx)); - } - } - - gd.sel.zwrite = zwrite; - gd.sel.ztest = ztest; - - if(zwrite || ztest) - { - gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt; - gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS; - gd.sel.zoverflow = (uint32)GSVector4i(m_vt.m_max.p).z == 0x80000000U; - } - - #if _M_SSE >= 0x501 - - gd.fm = fm; - gd.zm = zm; - - if(gd.sel.fpsm == 1) - { - gd.fm |= 0xff000000; - } - else if(gd.sel.fpsm == 2) - { - uint32 rb = gd.fm & 0x00f800f8; - uint32 ga = gd.fm & 0x8000f800; - - gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000; - } - - if(gd.sel.zpsm == 1) - { - gd.zm |= 0xff000000; - } - else if(gd.sel.zpsm == 2) - { - gd.zm |= 0xffff0000; - } - - #else - - gd.fm = GSVector4i(fm); - gd.zm = GSVector4i(zm); - - if(gd.sel.fpsm == 1) - { - gd.fm |= GSVector4i::xff000000(); - } - else if(gd.sel.fpsm == 2) - { - GSVector4i rb = gd.fm & 0x00f800f8; - GSVector4i ga = gd.fm & 0x8000f800; - - gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000(); - } - - if(gd.sel.zpsm == 1) - { - gd.zm |= GSVector4i::xff000000(); - } - else if(gd.sel.zpsm == 2) - { - gd.zm |= GSVector4i::xffff0000(); - } - - #endif - - if(gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data->bbox.eq(data->bbox.rintersect(data->scissor))) // TODO: check scissor horizontally only - { - gd.sel.notest = 1; - - uint32 ofx = context->XYOFFSET.OFX; - - for(int i = 0, j = m_vertex.tail; i < j; i++) - { - #if _M_SSE >= 0x501 - if((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8 - #else - if((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4 - #endif - { - gd.sel.notest = 0; - - break; - } - } - } - - return true; -} - -GSRendererSW::SharedData::SharedData(GSRendererSW* parent) - : m_parent(parent) - , m_fb_pages(NULL) - , m_zb_pages(NULL) - , m_fpsm(0) - , m_zpsm(0) - , m_using_pages(false) - , m_syncpoint(SyncNone) -{ - m_tex[0].t = NULL; - - global.sel.key = 0; - - global.clut = NULL; - global.dimx = NULL; -} - -GSRendererSW::SharedData::~SharedData() -{ - ReleasePages(); - - if(global.clut) _aligned_free(global.clut); - if(global.dimx) _aligned_free(global.dimx); - - if(LOG) {fprintf(s_fp, "[%d] done t=%lld p=%d | %d %d %d | %08x_%08x\n", - counter, - __rdtsc() - start, pixels, - primclass, vertex_count, index_count, - global.sel.hi, global.sel.lo - ); - fflush(s_fp);} -} - -//static TransactionScope::Lock s_lock; - -void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm) -{ - if(m_using_pages) return; - - { - //TransactionScope scope(s_lock); - - if(global.sel.fb && fb_pages != NULL) - { - m_parent->UsePages(fb_pages, 0); - } - - if(global.sel.zb && zb_pages != NULL) - { - m_parent->UsePages(zb_pages, 1); - } - - for(size_t i = 0; m_tex[i].t != NULL; i++) - { - m_parent->UsePages(m_tex[i].t->m_pages.n, 2); - } - } - - m_fb_pages = fb_pages; - m_zb_pages = zb_pages; - m_fpsm = fpsm; - m_zpsm = zpsm; - - m_using_pages = true; -} - -void GSRendererSW::SharedData::ReleasePages() -{ - if(!m_using_pages) return; - - { - //TransactionScope scope(s_lock); - - if(global.sel.fb) - { - m_parent->ReleasePages(m_fb_pages, 0); - } - - if(global.sel.zb) - { - m_parent->ReleasePages(m_zb_pages, 1); - } - - for(size_t i = 0; m_tex[i].t != NULL; i++) - { - m_parent->ReleasePages(m_tex[i].t->m_pages.n, 2); - } - } - - delete [] m_fb_pages; - delete [] m_zb_pages; - - m_fb_pages = NULL; - m_zb_pages = NULL; - - m_using_pages = false; -} - -void GSRendererSW::SharedData::SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level) -{ - ASSERT(m_tex[level].t == NULL); - - m_tex[level].t = t; - m_tex[level].r = r; - - m_tex[level + 1].t = NULL; -} - -void GSRendererSW::SharedData::UpdateSource() -{ - for(size_t i = 0; m_tex[i].t != NULL; i++) - { - if(m_tex[i].t->Update(m_tex[i].r)) - { - global.tex[i] = m_tex[i].t->m_buff; - } - else - { - printf("GSdx: out-of-memory, texturing temporarily disabled\n"); - - global.sel.tfx = TFX_NONE; - } - } - - // TODO - - if(m_parent->s_dump) - { - uint64 frame = m_parent->m_perfmon.GetFrame(); - - string s; - - if(m_parent->s_savet && m_parent->s_n >= m_parent->s_saven) - { - for(size_t i = 0; m_tex[i].t != NULL; i++) - { - s = format("%05d_f%lld_tex%d_%05x_%d.bmp", m_parent->s_n - 2, frame, i, (int)m_parent->m_context->TEX0.TBP0, (int)m_parent->m_context->TEX0.PSM); - - m_tex[i].t->Save(root_sw+s); - } - - if(global.clut != NULL) - { - GSTextureSW* t = new GSTextureSW(0, 256, 1); - - t->Update(GSVector4i(0, 0, 256, 1), global.clut, sizeof(uint32) * 256); - - s = format("%05d_f%lld_texp_%05x_%d.bmp", m_parent->s_n - 2, frame, (int)m_parent->m_context->TEX0.TBP0, (int)m_parent->m_context->TEX0.PSM); - - t->Save(root_sw+s); - - delete t; - } - } - } -} diff --git a/plugins/GSdx_legacy/GSRendererSW.h b/plugins/GSdx_legacy/GSRendererSW.h deleted file mode 100644 index b7b66c145c..0000000000 --- a/plugins/GSdx_legacy/GSRendererSW.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRenderer.h" -#include "GSTextureCacheSW.h" -#include "GSDrawScanline.h" - -class GSRendererSW : public GSRenderer -{ - class SharedData : public GSDrawScanline::SharedData - { - __aligned(struct, 16) TextureLevel - { - GSVector4i r; - GSTextureCacheSW::Texture* t; - }; - - public: - GSRendererSW* m_parent; - const uint32* m_fb_pages; - const uint32* m_zb_pages; - int m_fpsm; - int m_zpsm; - bool m_using_pages; - TextureLevel m_tex[7 + 1]; // NULL terminated - enum {SyncNone, SyncSource, SyncTarget} m_syncpoint; - - public: - SharedData(GSRendererSW* parent); - virtual ~SharedData(); - - void UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm); - void ReleasePages(); - - void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level); - void UpdateSource(); - }; - - typedef void (GSRendererSW::*ConvertVertexBufferPtr)(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); - - ConvertVertexBufferPtr m_cvb[4][2][2]; - - template - void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); - -protected: - IRasterizer* m_rl; - GSTextureCacheSW* m_tc; - GSTexture* m_texture[2]; - uint8* m_output; - GSPixelOffset4* m_fzb; - GSVector4i m_fzb_bbox; - uint32 m_fzb_cur_pages[16]; - std::atomic m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved - std::atomic m_tex_pages[512]; - uint32 m_tmp_pages[512 + 1]; - - void Reset(); - void VSync(int field); - void ResetDevice(); - GSTexture* GetOutput(int i); - - void Draw(); - void Queue(shared_ptr& item); - void Sync(int reason); - void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); - void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); - - void UsePages(const uint32* pages, const int type); - void ReleasePages(const uint32* pages, const int type); - - bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r); - bool CheckSourcePages(SharedData* sd); - - bool GetScanlineGlobalData(SharedData* data); - -public: - GSRendererSW(int threads); - virtual ~GSRendererSW(); -}; diff --git a/plugins/GSdx_legacy/GSScanlineEnvironment.h b/plugins/GSdx_legacy/GSScanlineEnvironment.h deleted file mode 100644 index cc71026b72..0000000000 --- a/plugins/GSdx_legacy/GSScanlineEnvironment.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSLocalMemory.h" -#include "GSVector.h" - -union GSScanlineSelector -{ - struct - { - uint32 fpsm:2; // 0 - uint32 zpsm:2; // 2 - uint32 ztst:2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g)) - uint32 atst:3; // 6 - uint32 afail:2; // 9 - uint32 iip:1; // 11 - uint32 tfx:3; // 12 - uint32 tcc:1; // 15 - uint32 fst:1; // 16 - uint32 ltf:1; // 17 - uint32 tlu:1; // 18 - uint32 fge:1; // 19 - uint32 date:1; // 20 - uint32 abe:1; // 21 - uint32 aba:2; // 22 - uint32 abb:2; // 24 - uint32 abc:2; // 26 - uint32 abd:2; // 28 - uint32 pabe:1; // 30 - uint32 aa1:1; // 31 - - uint32 fwrite:1; // 32 - uint32 ftest:1; // 33 - uint32 rfb:1; // 34 - uint32 zwrite:1; // 35 - uint32 ztest:1; // 36 - uint32 zoverflow:1; // 37 (z max >= 0x80000000) - uint32 wms:2; // 38 - uint32 wmt:2; // 40 - uint32 datm:1; // 42 - uint32 colclamp:1; // 43 - uint32 fba:1; // 44 - uint32 dthe:1; // 45 - uint32 prim:2; // 46 - - uint32 edge:1; // 48 - uint32 tw:3; // 49 (encodes values between 3 -> 10, texture cache makes sure it is at least 3) - uint32 lcm:1; // 52 - uint32 mmin:2; // 53 - uint32 notest:1; // 54 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels) - // TODO: 1D texture flag? could save 2 texture reads and 4 lerps with bilinear, and also the texture coordinate clamp/wrap code in one direction - }; - - struct - { - uint32 _pad1:22; - uint32 ababcd:8; - uint32 _pad2:2; - uint32 fb:2; - uint32 _pad3:1; - uint32 zb:2; - }; - - struct - { - uint32 lo; - uint32 hi; - }; - - uint64 key; - - operator uint32() const {return lo;} - operator uint64() const {return key;} - - bool IsSolidRect() const - { - return prim == GS_SPRITE_CLASS - && iip == 0 - && tfx == TFX_NONE - && abe == 0 - && ztst <= 1 - && atst <= 1 - && date == 0 - && fge == 0; - } -}; - -__aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like a pixel shader constant buffer -{ - GSScanlineSelector sel; - - // - the data of vm, tex may change, multi-threaded drawing must be finished before that happens, clut and dimx are copies - // - tex is a cached texture, it may be recycled to free up memory, its absolute address cannot be compiled into code - // - row and column pointers are allocated once and never change or freed, thier address can be used directly - - void* vm; - const void* tex[7]; - uint32* clut; - GSVector4i* dimx; - - const int* fbr; - const int* zbr; - const int* fbc; - const int* zbc; - const GSVector2i* fzbr; - const GSVector2i* fzbc; - - GSVector4i aref; - GSVector4i afix; - struct {GSVector4i min, max, minmax, mask, invmask;} t; // [u] x 4 [v] x 4 - - #if _M_SSE >= 0x501 - - uint32 fm, zm; - uint32 frb, fga; - GSVector8 mxl; - GSVector8 k; // TEX1.K * 0x10000 - GSVector8 l; // TEX1.L * -0x10000 - struct {GSVector8i i, f;} lod; // lcm == 1 - - #else - - GSVector4i fm, zm; - GSVector4i frb, fga; - GSVector4 mxl; - GSVector4 k; // TEX1.K * 0x10000 - GSVector4 l; // TEX1.L * -0x10000 - struct {GSVector4i i, f;} lod; // lcm == 1 - - #endif -}; - -__aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own -{ - #if _M_SSE >= 0x501 - - struct skip {GSVector8 z, s, t, q; GSVector8i rb, ga, f, _pad;} d[8]; - struct step {GSVector4 stq; struct {uint32 rb, ga;} c; struct {uint32 z, f;} p;} d8; - struct {GSVector8i rb, ga;} c; - struct {uint32 z, f;} p; - - // these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack) - - struct - { - GSVector8 z, zo; - GSVector8i f; - GSVector8 s, t, q; - GSVector8i rb, ga; - GSVector8i zs, zd; - GSVector8i uf, vf; - GSVector8i cov; - - // mipmapping - - struct {GSVector8i i, f;} lod; - GSVector8i uv[2]; - GSVector8i uv_minmax[2]; - GSVector8i trb, tga; - GSVector8i test; - } temp; - - #else - - struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4]; - struct step {GSVector4 z, stq; GSVector4i c, f;} d4; - struct {GSVector4i rb, ga;} c; - struct {GSVector4i z, f;} p; - - // these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack) - - struct - { - GSVector4 z, zo; - GSVector4i f; - GSVector4 s, t, q; - GSVector4i rb, ga; - GSVector4i zs, zd; - GSVector4i uf, vf; - GSVector4i cov; - - // mipmapping - - struct {GSVector4i i, f;} lod; - GSVector4i uv[2]; - GSVector4i uv_minmax[2]; - GSVector4i trb, tga; - GSVector4i test; - } temp; - - #endif - - // - - const GSScanlineGlobalData* gd; -}; diff --git a/plugins/GSdx_legacy/GSSetting.cpp b/plugins/GSdx_legacy/GSSetting.cpp deleted file mode 100644 index 74c54eb411..0000000000 --- a/plugins/GSdx_legacy/GSSetting.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (C) 2007-2015 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSSetting.h" -#ifndef __linux__ -#include "resource.h" -#endif - -const char* dialog_message(int ID, bool* updateText) { - if (updateText) - *updateText = true; - switch (ID) - { - case IDC_FILTER: - return "Control the texture bilinear filtering of the emulation.\n\n" - "Nearest:\nAlways disable interpolation, rendering will be blocky.\n\n" - "PS2:\nUse same mode as the PS2. It is the more accurate option.\n\n" - "Forced:\nAlways enable interpolation. Rendering is smoother but it could generate some glitches."; - case IDC_CRC_LEVEL: - return "Control the number of Auto-CRC hacks applied to games.\n\n" - "None:\nRemove nearly all CRC hacks (debug only).\n\n" - "Minimum:\nEnable a couple of CRC hacks (23).\n\n" - "Partial:\nEnable most of the CRC hacks.\nRecommended OpenGL setting (Accurate/depth options may be required).\n\n" - "Full:\nEnable all CRC hacks.\nRecommended Direct3D setting.\n\n" - "Aggressive:\nUse more aggressive CRC hacks. Only affects a few games, removing some effects which might make the image sharper/clearer.\n" - "Affected games: FFX, FFX2, FFXII, GOW2, ICO, SoTC, SSX3, SMT3, SMTDDS1, SMTDDS2.\n" - "Works as a speedhack for: Steambot Chronicles."; - case IDC_SKIPDRAWHACK: - case IDC_SKIPDRAWHACKEDIT: - return "Skips drawing n surfaces completely. " - "Use it, for example, to try and get rid of bad post processing effects." - " Try values between 1 and 100."; - case IDC_ALPHAHACK: - return "Different alpha handling. Can work around some shadow problems."; - case IDC_OFFSETHACK: - return "Might fix some misaligned fog, bloom, or blend effect."; - case IDC_SPRITEHACK: - return "Helps getting rid of black inner lines in some filtered sprites." - " Half option is the preferred one. Use it for Mana Khemia or Ar tonelico for example." - " Full can be used for Tales of Destiny."; - case IDC_WILDHACK: - return "Lowers the GS precision to avoid gaps between pixels when upscaling. Fixes the text on Wild Arms games."; - case IDC_MSAACB: - return "Enables hardware Anti-Aliasing. Needs lots of memory." - " The Z-24 modes might need to have LogarithmicZ to compensate for the bits lost (only in DX9 mode).\n\n" - " MSAA is not implemented on the OpenGL renderer."; - case IDC_ALPHASTENCIL: - return "Extend stencil based emulation of destination alpha to perform stencil operations while drawing.\n\n" - "Improves many shadows which are normally overdrawn in parts, may affect other effects.\n" - "Will disable partial transparency in some games or even prevent drawing some elements altogether."; - case IDC_CHECK_DISABLE_ALL_HACKS: - return "FOR TESTING ONLY!!\n\n" - "Disable all CRC hacks - will break many games. Overrides CrcHacksExclusion at gsdx.ini\n" - "\n" - "It's possible to exclude CRC hacks also via the gsdx.ini. E.g.:\n" - "CrcHacksExclusions=all\n" - "CrcHacksExclusions=0x0F0C4A9C, 0x0EE5646B, 0x7ACF7E03"; - case IDC_ALIGN_SPRITE: - return "Fixes issues with upscaling(vertical lines) in Namco games like Ace Combat, Tekken, Soul Calibur, etc."; - case IDC_ROUND_SPRITE: - return "Corrects the sampling of 2D sprite textures when upscaling.\n\n" - "Fixes lines in sprites of games like Ar tonelico when upscaling.\n\n" - "Half option is for flat sprites, Full is for all sprites."; - case IDC_TCOFFSETX: - case IDC_TCOFFSETX2: - case IDC_TCOFFSETY: - case IDC_TCOFFSETY2: - return "Offset for the ST/UV texture coordinates. Fixes some odd texture issues and might fix some post processing alignment too.\n\n" - " 0500 0500, fixes Persona 3 minimap, helps Haunting Ground.\n" - " 0000 1000, fixes Xenosaga hair edges (DX10+ Issue)"; - case IDC_PALTEX: - return "When checked 4/8 bits texture will be send to the GPU with a palette. GPU will be in charge of the conversion.\n\n" - "When unchecked the CPU will convert directly the texture to 32 bits.\n\n" - "It is basically a trade-off between GPU/CPU."; - case IDC_ACCURATE_DATE: - return "Implement a more accurate algorithm to compute GS destination alpha testing.\n\n" - "It could be slower when the effects are used.\n\nNote: it requires the OpenGL 4.2 extension GL_ARB_shader_image_load_store."; - case IDC_ACCURATE_BLEND_UNIT: - return "Control the accuracy level of the GS blending unit emulation. Note: it requires OpenGL 4.5 driver support.\n\n" - "None:\nFast but introduce various rendering issues. It is intended for slow computer.\n\n" - "Basic:\nEmulate correctly most of the effects with a limited speed penalty. It is the recommended setting.\n\n" - "Medium:\nExtend it to all sprites. Performance impact remains reasonable in 3D game.\n\n" - "High:\nExtend it to destination alpha blending and color wrapping. (help shadow and fog effect). A good CPU is required.\n\n" - "Full:\nExcept few cases, the blending unit will be fully emulated by the shader. It is ultra slow! It is intended for debug.\n\n" - "Ultra:\nThe blending unit will be completely emulated by the shader. It is ultra slow! It is intended for debug."; - case IDC_SAFE_FBMASK: - return "By default, accurate blending relies on undefined hardware behavior to be fast.\nThis option enables a slower but safer behavior if anyone encounters an issue.\n"; - case IDC_TC_DEPTH: - return "Allows the conversion of Depth buffer from/to Color buffer. It is used for blur & depth of field effects"; - case IDC_AFCOMBO: - return "Reduces texture aliasing at extreme viewing angles. High performance impact."; - case IDC_AA1: - return "Internal GS feature. Reduces edge aliasing of lines and triangles when the game requests it."; - case IDC_SWTHREADS: - case IDC_SWTHREADS_EDIT: - return "Number of rendering threads: 0 for single thread, 2 or more for multithread (1 is for debugging)"; - case IDC_SHADEBOOST: - return "Allows brightness, contrast and saturation to be manually adjusted."; - case IDC_SHADER_FX: - return "Enables external shader for additional post-processing effects."; - case IDC_FXAA: - return "Enables fast approximate anti-aliasing. Small performance impact."; -#ifdef _WIN32 - // DX9 only - case IDC_FBA: - return "Makes textures partially or fully transparent as required by emulation. May cause unusual slowdowns for some games."; - case IDC_LOGZ: - return "Treat depth as logarithmic instead of linear. Recommended setting is on unless it causes graphical glitches."; -#endif - // Exclusive for Hardware Renderer - case IDC_PRELOAD_GS: - return "Uploads GS data when rendering a new frame to reproduce some effects accurately. Fixes black screen issues in games like Armored Core: Last Raven."; - case IDC_MIPMAP: - return "Enables mipmapping, which some games require to render correctly. Turn off only for debug purposes."; -#ifdef __linux__ - case IDC_FAST_TC_INV: - return "By default, the texture cache handles partial invalidations. Unfortunately it is very costly to compute CPU wise." - "\n\nThis hack replaces the partial invalidation with a complete deletion of the texture to reduce the CPU load.\n\nIt helps snowblind engine game."; -#endif - default: - if (updateText) - *updateText = false; - return ""; - } -} diff --git a/plugins/GSdx_legacy/GSSetting.h b/plugins/GSdx_legacy/GSSetting.h deleted file mode 100644 index ba47d92810..0000000000 --- a/plugins/GSdx_legacy/GSSetting.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2007-2015 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "stdafx.h" - -struct GSSetting -{ - int32_t value; - std::string name; - std::string note; - - template< typename T> - explicit GSSetting(T value, const char* name, const char* note) : - value(static_cast(value)), - name(name), - note(note) - { - } -}; - -const char* dialog_message(int ID, bool* updateText = NULL); - -#ifdef __linux__ -enum { - IDC_FILTER, - IDC_SKIPDRAWHACK, - IDC_SKIPDRAWHACKEDIT, - IDC_ALPHAHACK, - IDC_OFFSETHACK, - IDC_SPRITEHACK, - IDC_WILDHACK, - IDC_MSAACB, - IDC_ALPHASTENCIL, - IDC_CHECK_DISABLE_ALL_HACKS, - IDC_ALIGN_SPRITE, - IDC_ROUND_SPRITE, - IDC_TCOFFSETX, - IDC_TCOFFSETX2, - IDC_TCOFFSETY, - IDC_TCOFFSETY2, - IDC_PALTEX, - IDC_ACCURATE_BLEND_UNIT, - IDC_SAFE_FBMASK, - IDC_ACCURATE_DATE, - IDC_TC_DEPTH, - IDC_CRC_LEVEL, - IDC_AFCOMBO, - IDC_AA1, - IDC_SWTHREADS, - IDC_SWTHREADS_EDIT, - IDC_SHADEBOOST, - IDC_SHADER_FX, - IDC_FXAA, - IDC_MIPMAP, - IDC_PRELOAD_GS, - IDC_FAST_TC_INV, -}; -#endif diff --git a/plugins/GSdx_legacy/GSSettingsDlg.cpp b/plugins/GSdx_legacy/GSSettingsDlg.cpp deleted file mode 100644 index 4e0ac39229..0000000000 --- a/plugins/GSdx_legacy/GSSettingsDlg.cpp +++ /dev/null @@ -1,752 +0,0 @@ -/* - * Copyright (C) 2007-2015 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GSSettingsDlg.h" -#include "GSUtil.h" -#include "GSDevice9.h" -#include "GSDevice11.h" -#include "resource.h" -#include "GSSetting.h" - - -GSSettingsDlg::GSSettingsDlg() - : GSDialog(IDD_CONFIG) - -{ -#ifdef ENABLE_OPENCL - list ocldevs; - - GSUtil::GetDeviceDescs(ocldevs); - - int index = 0; - - for(auto dev : ocldevs) - { - m_ocl_devs.push_back(GSSetting(index++, dev.name.c_str(), "")); - } -#endif -} - -void GSSettingsDlg::OnInit() -{ - __super::OnInit(); - - CComPtr d3d9; - - d3d9.Attach(Direct3DCreate9(D3D_SDK_VERSION)); - - CComPtr dxgi_factory; - - if(GSUtil::CheckDXGI()) - { - CreateDXGIFactory1(__uuidof(IDXGIFactory1), (void**)&dxgi_factory); - } - adapters.clear(); - adapters.push_back(Adapter("Default Hardware Device", "default", GSUtil::CheckDirect3D11Level(NULL, D3D_DRIVER_TYPE_HARDWARE))); - adapters.push_back(Adapter("Reference Device", "ref", GSUtil::CheckDirect3D11Level(NULL, D3D_DRIVER_TYPE_REFERENCE))); - - if(dxgi_factory) - { - for(int i = 0;; i++) - { - CComPtr adapter; - - if(S_OK != dxgi_factory->EnumAdapters1(i, &adapter)) - break; - - DXGI_ADAPTER_DESC1 desc; - - HRESULT hr = adapter->GetDesc1(&desc); - - if(S_OK == hr) - { - D3D_FEATURE_LEVEL level = GSUtil::CheckDirect3D11Level(adapter, D3D_DRIVER_TYPE_UNKNOWN); - // GSDX isn't unicode!? -#if 1 - int size = WideCharToMultiByte(CP_ACP, 0, desc.Description, sizeof(desc.Description), NULL, 0, NULL, NULL); - char *buf = new char[size]; - WideCharToMultiByte(CP_ACP, 0, desc.Description, sizeof(desc.Description), buf, size, NULL, NULL); - adapters.push_back(Adapter(buf, GSAdapter(desc), level)); - delete[] buf; -#else - adapters.push_back(Adapter(desc.Description, GSAdapter(desc), level)); -#endif - } - } - } - else if(d3d9) - { - int n = d3d9->GetAdapterCount(); - for(int i = 0; i < n; i++) - { - D3DADAPTER_IDENTIFIER9 desc; - - if(D3D_OK != d3d9->GetAdapterIdentifier(i, 0, &desc)) - break; - - // GSDX isn't unicode!? -#if 0 - wchar_t buf[sizeof desc.Description * sizeof(WCHAR)]; - MultiByteToWideChar(CP_ACP /* I have no idea if this is right */, 0, desc.Description, sizeof(desc.Description), buf, sizeof buf / sizeof *buf); - adapters.push_back(Adapter(buf, GSAdapter(desc), (D3D_FEATURE_LEVEL)0)); -#else - adapters.push_back(Adapter(desc.Description, GSAdapter(desc), (D3D_FEATURE_LEVEL)0)); -#endif - } - } - - std::string adapter_setting = theApp.GetConfig("Adapter", "default"); - vector adapter_settings; - unsigned int adapter_sel = 0; - - for(unsigned int i = 0; i < adapters.size(); i++) - { - if(adapters[i].id == adapter_setting) - { - adapter_sel = i; - } - - adapter_settings.push_back(GSSetting(i, adapters[i].name.c_str(), "")); - } - - std::string ocldev = theApp.GetConfig("ocldev", ""); - - unsigned int ocl_sel = 0; - - for(unsigned int i = 0; i < m_ocl_devs.size(); i++) - { - if(ocldev == m_ocl_devs[i].name) - { - ocl_sel = i; - - break; - } - } - - ComboBoxInit(IDC_ADAPTER, adapter_settings, adapter_sel); - ComboBoxInit(IDC_OPENCL_DEVICE, m_ocl_devs, ocl_sel); - UpdateRenderers(); - - ComboBoxInit(IDC_INTERLACE, theApp.m_gs_interlace, theApp.GetConfig("Interlace", 7)); // 7 = "auto", detects interlace based on SMODE2 register - ComboBoxInit(IDC_UPSCALE_MULTIPLIER, theApp.m_gs_upscale_multiplier, theApp.GetConfig("upscale_multiplier", 1)); - ComboBoxInit(IDC_AFCOMBO, theApp.m_gs_max_anisotropy, theApp.GetConfig("MaxAnisotropy", 0)); - ComboBoxInit(IDC_FILTER, theApp.m_gs_filter, theApp.GetConfig("filter", 2)); - ComboBoxInit(IDC_ACCURATE_BLEND_UNIT, theApp.m_gs_acc_blend_level, theApp.GetConfig("accurate_blending_unit", 1)); - ComboBoxInit(IDC_CRC_LEVEL, theApp.m_gs_crc_level, theApp.GetConfig("crc_hack_level", 3)); - - CheckDlgButton(m_hWnd, IDC_PALTEX, theApp.GetConfig("paltex", 0)); - CheckDlgButton(m_hWnd, IDC_LOGZ, theApp.GetConfig("logz", 1)); - CheckDlgButton(m_hWnd, IDC_FBA, theApp.GetConfig("fba", 1)); - CheckDlgButton(m_hWnd, IDC_AA1, theApp.GetConfig("aa1", 0)); - CheckDlgButton(m_hWnd, IDC_MIPMAP, theApp.GetConfig("mipmap", 1)); - CheckDlgButton(m_hWnd, IDC_ACCURATE_DATE, theApp.GetConfig("accurate_date", 0)); - CheckDlgButton(m_hWnd, IDC_TC_DEPTH, theApp.GetConfig("texture_cache_depth", 0)); - - // Hacks - CheckDlgButton(m_hWnd, IDC_HACKS_ENABLED, theApp.GetConfig("UserHacks", 0)); - - SendMessage(GetDlgItem(m_hWnd, IDC_RESX), UDM_SETRANGE, 0, MAKELPARAM(8192, 256)); - SendMessage(GetDlgItem(m_hWnd, IDC_RESX), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("resx", 1024), 0)); - - SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_SETRANGE, 0, MAKELPARAM(8192, 256)); - SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("resy", 1024), 0)); - - SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 0)); - SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("extrathreads", DEFAULT_EXTRA_RENDERING_THREADS), 0)); - - AddTooltip(IDC_FILTER); - AddTooltip(IDC_CRC_LEVEL); - AddTooltip(IDC_PALTEX); - AddTooltip(IDC_ACCURATE_DATE); - AddTooltip(IDC_ACCURATE_BLEND_UNIT); - AddTooltip(IDC_TC_DEPTH); - AddTooltip(IDC_AFCOMBO); - AddTooltip(IDC_AA1); - AddTooltip(IDC_MIPMAP); - AddTooltip(IDC_SWTHREADS); - AddTooltip(IDC_SWTHREADS_EDIT); - AddTooltip(IDC_FBA); - AddTooltip(IDC_LOGZ); - - UpdateControls(); -} - -bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) -{ - switch (id) - { - case IDC_ADAPTER: - if (code == CBN_SELCHANGE) - { - UpdateRenderers(); - UpdateControls(); - } - break; - case IDC_RENDERER: - case IDC_UPSCALE_MULTIPLIER: - case IDC_FILTER: - if (code == CBN_SELCHANGE) - UpdateControls(); - break; - case IDC_PALTEX: - case IDC_HACKS_ENABLED: - if (code == BN_CLICKED) - UpdateControls(); - break; - case IDC_SHADEBUTTON: - if (code == BN_CLICKED) - ShaderDlg.DoModal(); - break; - case IDC_HACKSBUTTON: - if (code == BN_CLICKED) - HacksDlg.DoModal(); - break; - case IDOK: - { - INT_PTR data; - - if(ComboBoxGetSelData(IDC_ADAPTER, data)) - { - theApp.SetConfig("Adapter", adapters[(int)data].id.c_str()); - } - - if(ComboBoxGetSelData(IDC_OPENCL_DEVICE, data)) - { - if ((int)data < m_ocl_devs.size()) { - theApp.SetConfig("ocldev", m_ocl_devs[(int)data].name.c_str()); - } - } - - if(ComboBoxGetSelData(IDC_RENDERER, data)) - { - theApp.SetConfig("Renderer", (int)data); - } - - if(ComboBoxGetSelData(IDC_INTERLACE, data)) - { - theApp.SetConfig("Interlace", (int)data); - } - - if(ComboBoxGetSelData(IDC_UPSCALE_MULTIPLIER, data)) - { - theApp.SetConfig("upscale_multiplier", (int)data); - } - else - { - theApp.SetConfig("upscale_multiplier", 1); - } - - if (ComboBoxGetSelData(IDC_FILTER, data)) - { - theApp.SetConfig("filter", (int)data); - } - - if(ComboBoxGetSelData(IDC_ACCURATE_BLEND_UNIT, data)) - { - theApp.SetConfig("accurate_blending_unit", (int)data); - } - - if (ComboBoxGetSelData(IDC_CRC_LEVEL, data)) - { - theApp.SetConfig("crc_hack_level", (int)data); - } - - if(ComboBoxGetSelData(IDC_AFCOMBO, data)) - { - theApp.SetConfig("MaxAnisotropy", (int)data); - } - - theApp.SetConfig("paltex", (int)IsDlgButtonChecked(m_hWnd, IDC_PALTEX)); - theApp.SetConfig("logz", (int)IsDlgButtonChecked(m_hWnd, IDC_LOGZ)); - theApp.SetConfig("fba", (int)IsDlgButtonChecked(m_hWnd, IDC_FBA)); - theApp.SetConfig("aa1", (int)IsDlgButtonChecked(m_hWnd, IDC_AA1)); - theApp.SetConfig("mipmap", (int)IsDlgButtonChecked(m_hWnd, IDC_MIPMAP)); - theApp.SetConfig("resx", (int)SendMessage(GetDlgItem(m_hWnd, IDC_RESX), UDM_GETPOS, 0, 0)); - theApp.SetConfig("resy", (int)SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_GETPOS, 0, 0)); - theApp.SetConfig("extrathreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0)); - theApp.SetConfig("accurate_date", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_DATE)); - theApp.SetConfig("texture_cache_depth", (int)IsDlgButtonChecked(m_hWnd, IDC_TC_DEPTH)); - theApp.SetConfig("UserHacks", (int)IsDlgButtonChecked(m_hWnd, IDC_HACKS_ENABLED)); - } - break; - } - - return __super::OnCommand(hWnd, id, code); -} - -void GSSettingsDlg::UpdateRenderers() -{ - INT_PTR i; - - if (!ComboBoxGetSelData(IDC_ADAPTER, i)) - return; - - // Ugggh - HacksDlg.SetAdapter(adapters[(int)i].id); - - D3D_FEATURE_LEVEL level = adapters[(int)i].level; - - vector renderers; - - GSRendererType renderer_setting; - - if (ComboBoxGetSelData(IDC_RENDERER, i)) - renderer_setting = static_cast(i); - else - renderer_setting = static_cast(theApp.GetConfig("Renderer", static_cast(GSRendererType::Default))); - - GSRendererType renderer_sel = GSRendererType::Default; - - for(size_t i = 0; i < theApp.m_gs_renderers.size(); i++) - { - GSSetting r = theApp.m_gs_renderers[i]; - - GSRendererType renderer = static_cast(r.value); - - if(renderer == GSRendererType::DX1011_HW || renderer == GSRendererType::DX1011_SW || renderer == GSRendererType::DX1011_Null || renderer == GSRendererType::DX1011_OpenCL) - { - if(level < D3D_FEATURE_LEVEL_10_0) continue; -#if 0 - // This code is disabled so the renderer name doesn't get messed with. - // Just call it Direct3D11. - r.name += (level >= D3D_FEATURE_LEVEL_11_0 ? "11" : "10"); -#endif - } - - renderers.push_back(r); - - if (static_cast(r.value) == renderer_setting) - { - renderer_sel = renderer_setting; - } - } - - ComboBoxInit(IDC_RENDERER, renderers, static_cast(renderer_sel)); -} - -void GSSettingsDlg::UpdateControls() -{ - INT_PTR i; - - int integer_scaling = 0; // in case reading the combo doesn't work, enable the custom res control anyway - - if(ComboBoxGetSelData(IDC_UPSCALE_MULTIPLIER, i)) - { - integer_scaling = (int)i; - } - - if(ComboBoxGetSelData(IDC_RENDERER, i)) - { - GSRendererType renderer = static_cast(i); - - bool dx9 = renderer == GSRendererType::DX9_HW || renderer == GSRendererType::DX9_SW || renderer == GSRendererType::DX9_Null || renderer == GSRendererType::DX9_OpenCL; - bool dx11 = renderer == GSRendererType::DX1011_HW || renderer == GSRendererType::DX1011_SW || renderer == GSRendererType::DX1011_Null || renderer == GSRendererType::DX1011_OpenCL; - bool ogl = renderer == GSRendererType::OGL_HW || renderer == GSRendererType::OGL_SW || renderer == GSRendererType::OGL_OpenCL; - - bool hw = renderer == GSRendererType::DX9_HW || renderer == GSRendererType::DX1011_HW || renderer == GSRendererType::OGL_HW || renderer == GSRendererType::Null_HW; - bool sw = renderer == GSRendererType::DX9_SW || renderer == GSRendererType::DX1011_SW || renderer == GSRendererType::OGL_SW || renderer == GSRendererType::Null_SW; - bool ocl = renderer == GSRendererType::DX9_OpenCL || renderer == GSRendererType::DX1011_OpenCL || renderer == GSRendererType::Null_OpenCL || renderer == GSRendererType::OGL_OpenCL; - - ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO9), dx9 ? SW_SHOW : SW_HIDE); - ShowWindow(GetDlgItem(m_hWnd, IDC_LOGO11), dx11 ? SW_SHOW : SW_HIDE); - ShowWindow(GetDlgItem(m_hWnd, IDC_LOGOGL), ogl ? SW_SHOW : SW_HIDE); -#ifndef ENABLE_OPENCL - ShowWindow(GetDlgItem(m_hWnd, IDC_OPENCL_DEVICE), SW_HIDE); - ShowWindow(GetDlgItem(m_hWnd, IDC_OPENCL_TEXT), SW_HIDE); -#endif - - ShowWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9? SW_SHOW: SW_HIDE); - ShowWindow(GetDlgItem(m_hWnd, IDC_FBA), dx9 ? SW_SHOW : SW_HIDE); - - ShowWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_DATE), ogl ? SW_SHOW : SW_HIDE); - ShowWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_BLEND_UNIT), ogl ? SW_SHOW : SW_HIDE); - ShowWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_BLEND_UNIT_TEXT), ogl ? SW_SHOW : SW_HIDE); - ShowWindow(GetDlgItem(m_hWnd, IDC_TC_DEPTH), ogl ? SW_SHOW : SW_HIDE); - - EnableWindow(GetDlgItem(m_hWnd, IDC_CRC_LEVEL), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_CRC_LEVEL_TEXT), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_OPENCL_DEVICE), ocl); - EnableWindow(GetDlgItem(m_hWnd, IDC_RESX), hw && !integer_scaling); - EnableWindow(GetDlgItem(m_hWnd, IDC_RESX_EDIT), hw && !integer_scaling); - EnableWindow(GetDlgItem(m_hWnd, IDC_RESY), hw && !integer_scaling); - EnableWindow(GetDlgItem(m_hWnd, IDC_RESY_EDIT), hw && !integer_scaling); - EnableWindow(GetDlgItem(m_hWnd, IDC_CUSTOM_TEXT), hw && !integer_scaling); - EnableWindow(GetDlgItem(m_hWnd, IDC_UPSCALE_MULTIPLIER), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_UPSCALE_MULTIPLIER_TEXT), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_FILTER), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_PALTEX), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_FBA), dx9 && hw); - - INT_PTR filter; - if (ComboBoxGetSelData(IDC_FILTER, filter)) - { - EnableWindow(GetDlgItem(m_hWnd, IDC_AFCOMBO), hw && filter && !IsDlgButtonChecked(m_hWnd, IDC_PALTEX)); - } - EnableWindow(GetDlgItem(m_hWnd, IDC_AFCOMBO_TEXT), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_FILTER_TEXT), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_DATE), ogl && hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_BLEND_UNIT), ogl && hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_BLEND_UNIT_TEXT), ogl && hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_TC_DEPTH), ogl && hw); - - // Software mode settings - EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); - EnableWindow(GetDlgItem(m_hWnd, IDC_MIPMAP), sw); - EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_TEXT), sw); - EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw); - EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw); - - // Hacks - EnableWindow(GetDlgItem(m_hWnd, IDC_HACKS_ENABLED), hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_HACKSBUTTON), hw && IsDlgButtonChecked(m_hWnd, IDC_HACKS_ENABLED)); - } - -} - -// Shader Configuration Dialog - -GSShaderDlg::GSShaderDlg() : - GSDialog(IDD_SHADER) -{} - -void GSShaderDlg::OnInit() -{ - //TV Shader - ComboBoxInit(IDC_TVSHADER, theApp.m_gs_tv_shaders, theApp.GetConfig("TVshader", 0)); - - //Shade Boost - CheckDlgButton(m_hWnd, IDC_SHADEBOOST, theApp.GetConfig("ShadeBoost", 0)); - contrast = theApp.GetConfig("ShadeBoost_Contrast", 50); - brightness = theApp.GetConfig("ShadeBoost_Brightness", 50); - saturation = theApp.GetConfig("ShadeBoost_Saturation", 50); - - // External FX shader - CheckDlgButton(m_hWnd, IDC_SHADER_FX, theApp.GetConfig("shaderfx", 0)); - SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_EDIT), WM_SETTEXT, 0, (LPARAM)theApp.GetConfig("shaderfx_glsl", "shaders\\GSdx.fx").c_str()); - SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_EDIT), WM_SETTEXT, 0, (LPARAM)theApp.GetConfig("shaderfx_conf", "shaders\\GSdx_FX_Settings.ini").c_str()); - - // FXAA shader - CheckDlgButton(m_hWnd, IDC_FXAA, theApp.GetConfig("Fxaa", 0)); - - AddTooltip(IDC_SHADEBOOST); - AddTooltip(IDC_SHADER_FX); - AddTooltip(IDC_FXAA); - - UpdateControls(); -} - -void GSShaderDlg::UpdateControls() -{ - SendMessage(GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER), TBM_SETRANGE, TRUE, MAKELONG(0, 100)); - SendMessage(GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER), TBM_SETRANGE, TRUE, MAKELONG(0, 100)); - SendMessage(GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER), TBM_SETRANGE, TRUE, MAKELONG(0, 100)); - - SendMessage(GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER), TBM_SETPOS, TRUE, saturation); - SendMessage(GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER), TBM_SETPOS, TRUE, brightness); - SendMessage(GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER), TBM_SETPOS, TRUE, contrast); - - char text[8] = {0}; - - sprintf(text, "%d", saturation); - SetDlgItemText(m_hWnd, IDC_SATURATION_TEXT, text); - sprintf(text, "%d", brightness); - SetDlgItemText(m_hWnd, IDC_BRIGHTNESS_TEXT, text); - sprintf(text, "%d", contrast); - SetDlgItemText(m_hWnd, IDC_CONTRAST_TEXT, text); - - // Shader Settings - bool external_shader_selected = IsDlgButtonChecked(m_hWnd, IDC_SHADER_FX) == BST_CHECKED; - bool shadeboost_selected = IsDlgButtonChecked(m_hWnd, IDC_SHADEBOOST) == BST_CHECKED; - EnableWindow(GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER), shadeboost_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER), shadeboost_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER), shadeboost_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_SATURATION_TEXT), shadeboost_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_BRIGHTNESS_TEXT), shadeboost_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_CONTRAST_TEXT), shadeboost_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_SHADER_FX_TEXT), external_shader_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_SHADER_FX_EDIT), external_shader_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_SHADER_FX_BUTTON), external_shader_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_TEXT), external_shader_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_EDIT), external_shader_selected); - EnableWindow(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_BUTTON), external_shader_selected); -} - -bool GSShaderDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) -{ - switch(message) - { - case WM_HSCROLL: - { - if((HWND)lParam == GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER)) - { - char text[8] = {0}; - - saturation = SendMessage(GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER),TBM_GETPOS,0,0); - - sprintf(text, "%d", saturation); - SetDlgItemText(m_hWnd, IDC_SATURATION_TEXT, text); - } - else if((HWND)lParam == GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER)) - { - char text[8] = {0}; - - brightness = SendMessage(GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER),TBM_GETPOS,0,0); - - sprintf(text, "%d", brightness); - SetDlgItemText(m_hWnd, IDC_BRIGHTNESS_TEXT, text); - } - else if((HWND)lParam == GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER)) - { - char text[8] = {0}; - - contrast = SendMessage(GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER),TBM_GETPOS,0,0); - - sprintf(text, "%d", contrast); - SetDlgItemText(m_hWnd, IDC_CONTRAST_TEXT, text); - } - } break; - - case WM_COMMAND: - { - int id = LOWORD(wParam); - - switch(id) - { - case IDOK: - { - INT_PTR data; - //TV Shader - if (ComboBoxGetSelData(IDC_TVSHADER, data)) - { - theApp.SetConfig("TVshader", (int)data); - } - // Shade Boost - theApp.SetConfig("ShadeBoost", (int)IsDlgButtonChecked(m_hWnd, IDC_SHADEBOOST)); - theApp.SetConfig("ShadeBoost_Contrast", contrast); - theApp.SetConfig("ShadeBoost_Brightness", brightness); - theApp.SetConfig("ShadeBoost_Saturation", saturation); - - // FXAA shader - theApp.SetConfig("Fxaa", (int)IsDlgButtonChecked(m_hWnd, IDC_FXAA)); - - // External FX Shader - theApp.SetConfig("shaderfx", (int)IsDlgButtonChecked(m_hWnd, IDC_SHADER_FX)); - - // External FX Shader(OpenGL) - int shader_fx_length = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_EDIT), WM_GETTEXTLENGTH, 0, 0); - int shader_fx_conf_length = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_EDIT), WM_GETTEXTLENGTH, 0, 0); - int length = std::max(shader_fx_length, shader_fx_conf_length) + 1; - char *buffer = new char[length]; - - - SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_EDIT), WM_GETTEXT, (WPARAM)length, (LPARAM)buffer); - theApp.SetConfig("shaderfx_glsl", buffer); // Not really glsl only ;) - SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_EDIT), WM_GETTEXT, (WPARAM)length, (LPARAM)buffer); - theApp.SetConfig("shaderfx_conf", buffer); - delete[] buffer; - - EndDialog(m_hWnd, id); - } break; - case IDC_SHADEBOOST: - UpdateControls(); - case IDC_SHADER_FX: - if (HIWORD(wParam) == BN_CLICKED) - UpdateControls(); - break; - case IDC_SHADER_FX_BUTTON: - if (HIWORD(wParam) == BN_CLICKED) - OpenFileDialog(IDC_SHADER_FX_EDIT, "Select External Shader"); - break; - - case IDC_SHADER_FX_CONF_BUTTON: - if (HIWORD(wParam) == BN_CLICKED) - OpenFileDialog(IDC_SHADER_FX_CONF_EDIT, "Select External Shader Config"); - break; - - case IDCANCEL: - { - EndDialog(m_hWnd, IDCANCEL); - } break; - } - - } break; - - case WM_CLOSE:EndDialog(m_hWnd, IDCANCEL); break; - - default: return false; - } - - - return true; -} - -// Hacks Dialog - -GSHacksDlg::GSHacksDlg() : - GSDialog(IDD_HACKS) -{ - memset(msaa2cb, 0, sizeof(msaa2cb)); - memset(cb2msaa, 0, sizeof(cb2msaa)); -} - -void GSHacksDlg::OnInit() -{ - HWND hwnd_renderer = GetDlgItem(GetParent(m_hWnd), IDC_RENDERER); - GSRendererType renderer = static_cast(SendMessage(hwnd_renderer, CB_GETITEMDATA, SendMessage(hwnd_renderer, CB_GETCURSEL, 0, 0), 0)); - // It can only be accessed with a HW renderer, so this is sufficient. - bool dx9 = renderer == GSRendererType::DX9_HW; - // bool dx11 = renderer == GSRendererType::DX1011_HW; - bool ogl = renderer == GSRendererType::OGL_HW; - unsigned short cb = 0; - - if(dx9) for(unsigned short i = 0; i < 17; i++) - { - if( i == 1) continue; - - int depth = GSDevice9::GetMaxDepth(i, adapter_id); - - if(depth) - { - char text[32] = {0}; - sprintf(text, depth == 32 ? "%dx Z-32" : "%dx Z-24", i); - SendMessage(GetDlgItem(m_hWnd, IDC_MSAACB), CB_ADDSTRING, 0, (LPARAM)text); - - msaa2cb[i] = cb; - cb2msaa[cb] = i; - cb++; - } - } - else for(unsigned short j = 0; j < 5; j++) // TODO: Make the same kind of check for d3d11, eventually.... - { - unsigned short i = j == 0 ? 0 : 1 << j; - - msaa2cb[i] = j; - cb2msaa[j] = i; - - char text[32] = {0}; - sprintf(text, "%dx ", i); - - SendMessage(GetDlgItem(m_hWnd, IDC_MSAACB), CB_ADDSTRING, 0, (LPARAM)text); - } - - SendMessage(GetDlgItem(m_hWnd, IDC_MSAACB), CB_SETCURSEL, msaa2cb[min(theApp.GetConfig("UserHacks_MSAA", 0), 16)], 0); - - CheckDlgButton(m_hWnd, IDC_ALPHAHACK, theApp.GetConfig("UserHacks_AlphaHack", 0)); - CheckDlgButton(m_hWnd, IDC_OFFSETHACK, theApp.GetConfig("UserHacks_HalfPixelOffset", 0)); - CheckDlgButton(m_hWnd, IDC_WILDHACK, theApp.GetConfig("UserHacks_WildHack", 0)); - CheckDlgButton(m_hWnd, IDC_ALPHASTENCIL, theApp.GetConfig("UserHacks_AlphaStencil", 0)); - CheckDlgButton(m_hWnd, IDC_PRELOAD_GS, theApp.GetConfig("preload_frame_with_gs_data", 0)); - CheckDlgButton(m_hWnd, IDC_ALIGN_SPRITE, theApp.GetConfig("UserHacks_align_sprite_X", 0)); - CheckDlgButton(m_hWnd, IDC_SAFE_FBMASK, theApp.GetConfig("UserHacks_safe_fbmask", 0)); - - - ComboBoxInit(IDC_ROUND_SPRITE, theApp.m_gs_hack, theApp.GetConfig("UserHacks_round_sprite_offset", 0)); - ComboBoxInit(IDC_SPRITEHACK, theApp.m_gs_hack, theApp.GetConfig("UserHacks_SpriteHack", 0)); - - SendMessage(GetDlgItem(m_hWnd, IDC_SKIPDRAWHACK), UDM_SETRANGE, 0, MAKELPARAM(1000, 0)); - SendMessage(GetDlgItem(m_hWnd, IDC_SKIPDRAWHACK), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("UserHacks_SkipDraw", 0), 0)); - - SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETX), UDM_SETRANGE, 0, MAKELPARAM(10000, 0)); - SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETX), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("UserHacks_TCOffset", 0) & 0xFFFF, 0)); - - SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETY), UDM_SETRANGE, 0, MAKELPARAM(10000, 0)); - SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETY), UDM_SETPOS, 0, MAKELPARAM((theApp.GetConfig("UserHacks_TCOffset", 0) >> 16) & 0xFFFF, 0)); - - ShowWindow(GetDlgItem(m_hWnd, IDC_ALPHASTENCIL), ogl ? SW_HIDE : SW_SHOW); - ShowWindow(GetDlgItem(m_hWnd, IDC_ALPHAHACK), ogl ? SW_HIDE : SW_SHOW); - ShowWindow(GetDlgItem(m_hWnd, IDC_SAFE_FBMASK), ogl ? SW_SHOW : SW_HIDE); - EnableWindow(GetDlgItem(m_hWnd, IDC_MSAACB), !ogl); - EnableWindow(GetDlgItem(m_hWnd, IDC_MSAA_TEXT), !ogl); - - AddTooltip(IDC_SKIPDRAWHACKEDIT); - AddTooltip(IDC_SKIPDRAWHACK); - AddTooltip(IDC_ALPHAHACK); - AddTooltip(IDC_OFFSETHACK); - AddTooltip(IDC_SPRITEHACK); - AddTooltip(IDC_WILDHACK); - AddTooltip(IDC_MSAACB); - AddTooltip(IDC_ALPHASTENCIL); - AddTooltip(IDC_ALIGN_SPRITE); - AddTooltip(IDC_ROUND_SPRITE); - AddTooltip(IDC_TCOFFSETX); - AddTooltip(IDC_TCOFFSETX2); - AddTooltip(IDC_TCOFFSETY); - AddTooltip(IDC_TCOFFSETY2); - AddTooltip(IDC_PRELOAD_GS); - AddTooltip(IDC_SAFE_FBMASK); -} - -void GSHacksDlg::UpdateControls() -{} - -bool GSHacksDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) -{ - switch(message) - { - case WM_COMMAND: - { - int id = LOWORD(wParam); - - switch(id) - { - case IDOK: - { - INT_PTR data; - if (ComboBoxGetSelData(IDC_ROUND_SPRITE, data)) - { - theApp.SetConfig("UserHacks_round_sprite_offset", (int)data); - } - if (ComboBoxGetSelData(IDC_SPRITEHACK, data)) - { - theApp.SetConfig("UserHacks_SpriteHack", (int)data); - } - theApp.SetConfig("UserHacks_MSAA", cb2msaa[(int)SendMessage(GetDlgItem(m_hWnd, IDC_MSAACB), CB_GETCURSEL, 0, 0)]); - theApp.SetConfig("UserHacks_AlphaHack", (int)IsDlgButtonChecked(m_hWnd, IDC_ALPHAHACK)); - theApp.SetConfig("UserHacks_HalfPixelOffset", (int)IsDlgButtonChecked(m_hWnd, IDC_OFFSETHACK)); - theApp.SetConfig("UserHacks_SkipDraw", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SKIPDRAWHACK), UDM_GETPOS, 0, 0)); - theApp.SetConfig("UserHacks_WildHack", (int)IsDlgButtonChecked(m_hWnd, IDC_WILDHACK)); - theApp.SetConfig("UserHacks_AlphaStencil", (int)IsDlgButtonChecked(m_hWnd, IDC_ALPHASTENCIL)); - theApp.SetConfig("preload_frame_with_gs_data", (int)IsDlgButtonChecked(m_hWnd, IDC_PRELOAD_GS)); - theApp.SetConfig("Userhacks_align_sprite_X", (int)IsDlgButtonChecked(m_hWnd, IDC_ALIGN_SPRITE)); - theApp.SetConfig("UserHacks_safe_fbmask", (int)IsDlgButtonChecked(m_hWnd, IDC_SAFE_FBMASK)); - - - unsigned int TCOFFSET = SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETX), UDM_GETPOS, 0, 0) & 0xFFFF; - TCOFFSET |= (SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETY), UDM_GETPOS, 0, 0) & 0xFFFF) << 16; - - theApp.SetConfig("UserHacks_TCOffset", TCOFFSET); - - EndDialog(m_hWnd, id); - } break; - } - - } break; - - case WM_CLOSE:EndDialog(m_hWnd, IDCANCEL); break; - - default: return false; - } - - return true; -} diff --git a/plugins/GSdx_legacy/GSSettingsDlg.h b/plugins/GSdx_legacy/GSSettingsDlg.h deleted file mode 100644 index 8bfcf3f373..0000000000 --- a/plugins/GSdx_legacy/GSSettingsDlg.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDialog.h" -#include "GSSetting.h" - -class GSShaderDlg : public GSDialog -{ - int saturation; - int brightness; - int contrast; - - void UpdateControls(); - -protected: - void OnInit(); - bool OnMessage(UINT message, WPARAM wParam, LPARAM lParam); - -public: - GSShaderDlg(); -}; - -class GSHacksDlg : public GSDialog -{ - unsigned short cb2msaa[17]; - unsigned short msaa2cb[17]; - std::string adapter_id; - - bool isdx9; - - void UpdateControls(); - -protected: - void OnInit(); - bool OnMessage(UINT message, WPARAM wParam, LPARAM lParam); - -public: - GSHacksDlg(); - - // Ugh - void SetAdapter(std::string adapter_id_) - { - adapter_id = adapter_id_; - } -}; - -class GSSettingsDlg : public GSDialog -{ - - struct Adapter - { - std::string name; - std::string id; - D3D_FEATURE_LEVEL level; - Adapter(const std::string &n, const std::string &i, const D3D_FEATURE_LEVEL &l) : name(n), id(i), level(l) {} - }; - - std::vector adapters; - - vector m_ocl_devs; - uint32 m_lastValidMsaa; // used to revert to previous dialog value if the user changed to invalid one, or lesser one and canceled - - void UpdateRenderers(); - void UpdateControls(); - -protected: - void OnInit(); - bool OnCommand(HWND hWnd, UINT id, UINT code); - - // Shade Boost - GSShaderDlg ShaderDlg; - GSHacksDlg HacksDlg; - -public: - GSSettingsDlg(); -}; diff --git a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.cpp b/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.cpp deleted file mode 100644 index 37e253ee9f..0000000000 --- a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSSetupPrimCodeGenerator.h" - -#if _M_SSE >= 0x501 - -const GSVector8 GSSetupPrimCodeGenerator::m_shift[9] = -{ - GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f), - GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f), - GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f), - GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f), - GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f), - GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f), - GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f), - GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f), - GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f), -}; - -#else - -const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] = -{ - GSVector4(4.0f, 4.0f, 4.0f, 4.0f), - GSVector4(0.0f, 1.0f, 2.0f, 3.0f), - GSVector4(-1.0f, 0.0f, 1.0f, 2.0f), - GSVector4(-2.0f, -1.0f, 0.0f, 1.0f), - GSVector4(-3.0f, -2.0f, -1.0f, 0.0f), -}; - -#endif - -GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) - , m_local(*(GSScanlineLocalData*)param) -{ - m_sel.key = key; - - m_en.z = m_sel.zb ? 1 : 0; - m_en.f = m_sel.fb && m_sel.fge ? 1 : 0; - m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0; - m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0; - - Generate(); -} diff --git a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.h b/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.h deleted file mode 100644 index 746d7996aa..0000000000 --- a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSScanlineEnvironment.h" -#include "GSFunctionMap.h" - -class GSSetupPrimCodeGenerator : public GSCodeGenerator -{ - void operator = (const GSSetupPrimCodeGenerator&); - - GSScanlineSelector m_sel; - GSScanlineLocalData& m_local; - - struct {uint32 z:1, f:1, t:1, c:1;} m_en; - - void Generate(); - - void Depth(); - void Texture(); - void Color(); - -public: - GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); - - #if _M_SSE >= 0x501 - static const GSVector8 m_shift[9]; - #else - static const GSVector4 m_shift[5]; - #endif -}; diff --git a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x64.avx.cpp b/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x64.avx.cpp deleted file mode 100644 index 5fe710dad3..0000000000 --- a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x64.avx.cpp +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE == 0x500 && (defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -void GSSetupPrimCodeGenerator::Generate() -{ - sub(rsp, 8 + 2 * 16); - - vmovdqa(ptr[rsp + 0], xmm6); - vmovdqa(ptr[rsp + 16], xmm7); - - mov(r8, (size_t)&m_local); - - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(rax, (size_t)&m_shift[0]); - - for(int i = 0; i < 5; i++) - { - vmovaps(Xmm(3 + i), ptr[rax + i * 16]); - } - } - - Depth(); - - Texture(); - - Color(); - - vmovdqa(xmm6, ptr[rsp + 0]); - vmovdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth() -{ - if(!m_en.z && !m_en.f) - { - return; - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 p = dscan.p; - - vmovaps(xmm0, ptr[rdx + offsetof(GSVertexSW, p)]); - - if(m_en.f) - { - // GSVector4 df = p.wwww(); - - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - - // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); - - vmulps(xmm2, xmm1, xmm3); - vcvttps2dq(xmm2, xmm2); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.f)], xmm2); - - for(int i = 0; i < 4; i++) - { - // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); - - vmulps(xmm2, xmm1, Xmm(4 + i)); - vcvttps2dq(xmm2, xmm2); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(ptr[r8 + variableOffset], xmm2); - } - } - - if(m_en.z) - { - // GSVector4 dz = p.zzzz(); - - vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d4.z = dz * 4.0f; - - vmulps(xmm1, xmm0, xmm3); - vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.z)], xmm1); - - for(int i = 0; i < 4; i++) - { - // m_local.d[i].z = dz * m_shift[i]; - - vmulps(xmm1, xmm0, Xmm(4 + i)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(ptr[r8 + variableOffset], xmm1); - } - } - } - else - { - // GSVector4 p = vertices[0].p; - - vmovaps(xmm0, ptr[rcx + offsetof(GSVertexSW, p)]); - - if(m_en.f) - { - // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); - - vcvttps2dq(xmm1, xmm0); - vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, p.f)], xmm1); - } - - if(m_en.z) - { - // GSVector4 z = p.zzzz(); - - vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - if(m_sel.zoverflow) - { - // m_local.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); - - mov(r9, (size_t)&GSVector4::m_half); - - vbroadcastss(xmm1, ptr[r9]); - vmulps(xmm1, xmm0); - vcvttps2dq(xmm1, xmm1); - vpslld(xmm1, 1); - - vcvttps2dq(xmm0, xmm0); - vpcmpeqd(xmm2, xmm2); - vpsrld(xmm2, 31); - vpand(xmm0, xmm2); - - vpor(xmm0, xmm1); - } - else - { - // m_local.p.z = GSVector4i(z); - - vcvttps2dq(xmm0, xmm0); - } - - vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, p.z)], xmm0); - } - } -} - -void GSSetupPrimCodeGenerator::Texture() -{ - if(!m_en.t) - { - return; - } - - // GSVector4 t = dscan.t; - - vmovaps(xmm0, ptr[rdx + offsetof(GSVertexSW, t)]); - - vmulps(xmm1, xmm0, xmm3); - - if(m_sel.fst) - { - // m_local.d4.stq = GSVector4i(t * 4.0f); - - vcvttps2dq(xmm1, xmm1); - - vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1); - } - else - { - // m_local.d4.stq = t * 4.0f; - - vmovaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1); - } - - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector4 ds = t.xxxx(); - // GSVector4 dt = t.yyyy(); - // GSVector4 dq = t.zzzz(); - - vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for(int i = 0; i < 4; i++) - { - // GSVector4 v = ds/dt * m_shift[i]; - - vmulps(xmm2, xmm1, Xmm(4 + i)); - - if(m_sel.fst) - { - // m_local.d[i].s/t = GSVector4i(v); - - vcvttps2dq(xmm2, xmm2); - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch(j) - { - case 0: vmovdqa(ptr[r8 + variableOffsetS], xmm2); break; - case 1: vmovdqa(ptr[r8 + variableOffsetT], xmm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch(j) - { - case 0: vmovaps(ptr[r8 + variableOffsetS], xmm2); break; - case 1: vmovaps(ptr[r8 + variableOffsetT], xmm2); break; - case 2: vmovaps(ptr[r8 + variableOffsetQ], xmm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color() -{ - if(!m_en.c) - { - return; - } - - if(m_sel.iip) - { - // GSVector4 c = dscan.c; - - vmovaps(xmm0, ptr[rdx + offsetof(GSVertexSW, c)]); - - // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - vmulps(xmm1, xmm0, xmm3); - vcvttps2dq(xmm1, xmm1); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0)); - vpackssdw(xmm1, xmm1); - vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.c)], xmm1); - - // xmm3 is not needed anymore - - // GSVector4 dr = c.xxxx(); - // GSVector4 db = c.zzzz(); - - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - for(int i = 0; i < 4; i++) - { - // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); - - vmulps(xmm0, xmm2, Xmm(4 + i)); - vcvttps2dq(xmm0, xmm0); - vpackssdw(xmm0, xmm0); - - // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); - - vmulps(xmm1, xmm3, Xmm(4 + i)); - vcvttps2dq(xmm1, xmm1); - vpackssdw(xmm1, xmm1); - - // m_local.d[i].rb = r.upl16(b); - - vpunpcklwd(xmm0, xmm1); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(ptr[r8 + variableOffset], xmm0); - } - - // GSVector4 c = dscan.c; - - vmovaps(xmm0, ptr[rdx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - - // GSVector4 dg = c.yyyy(); - // GSVector4 da = c.wwww(); - - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - - for(int i = 0; i < 4; i++) - { - // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); - - vmulps(xmm0, xmm2, Xmm(4 + i)); - vcvttps2dq(xmm0, xmm0); - vpackssdw(xmm0, xmm0); - - // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); - - vmulps(xmm1, xmm3, Xmm(4 + i)); - vcvttps2dq(xmm1, xmm1); - vpackssdw(xmm1, xmm1); - - // m_local.d[i].ga = g.upl16(a); - - vpunpcklwd(xmm0, xmm1); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0])); - vmovdqa(ptr[r8 + variableOffset], xmm0); - } - } - else - { - // GSVector4i c = GSVector4i(vertices[0].c); - - vcvttps2dq(xmm0, ptr[rcx + offsetof(GSVertexSW, c)]); - - // c = c.upl16(c.zwxy()); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(xmm0, xmm1); - - // if(!tme) c = c.srl16(7); - - if(m_sel.tfx == TFX_NONE) - { - vpsrlw(xmm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, c.rb)], xmm1); - vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, c.ga)], xmm2); - } -} - -#endif diff --git a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x64.cpp b/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x64.cpp deleted file mode 100644 index 6456ead387..0000000000 --- a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x64.cpp +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE < 0x500 && (defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -void GSSetupPrimCodeGenerator::Generate() -{ - sub(rsp, 8 + 2 * 16); - - vmovdqa(ptr[rsp + 0], xmm6); - vmovdqa(ptr[rsp + 16], xmm7); - - mov(r8, (size_t)&m_local); - - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - for(int i = 0; i < 5; i++) - { - movaps(Xmm(3 + i), ptr[rax + i * 16]); - } - } - - Depth(); - - Texture(); - - Color(); - - vmovdqa(xmm6, ptr[rsp + 0]); - vmovdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth() -{ - if(!m_en.z && !m_en.f) - { - return; - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 p = dscan.p; - - movaps(xmm0, ptr[rdx + offsetof(GSVertexSW, p)]); - - if(m_en.f) - { - // GSVector4 df = p.wwww(); - - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - - // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); - - movaps(xmm2, xmm1); - mulps(xmm2, xmm3); - cvttps2dq(xmm2, xmm2); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - movdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.f)], xmm2); - - for(int i = 0; i < 4; i++) - { - // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); - - movaps(xmm2, xmm1); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0])); - movdqa(ptr[r8 + variableOffset], xmm2); - } - } - - if(m_en.z) - { - // GSVector4 dz = p.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d4.z = dz * 4.0f; - - movaps(xmm1, xmm0); - mulps(xmm1, xmm3); - movdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.z)], xmm1); - - for(int i = 0; i < 4; i++) - { - // m_local.d[i].z = dz * m_shift[i]; - - movaps(xmm1, xmm0); - mulps(xmm1, Xmm(4 + i)); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0])); - movdqa(ptr[r8 + variableOffset], xmm1); - } - } - } - else - { - // GSVector4 p = vertices[0].p; - - movaps(xmm0, ptr[rcx + offsetof(GSVertexSW, p)]); - - if(m_en.f) - { - // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); - - cvttps2dq(xmm1, xmm0); - pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - movdqa(ptr[r8 + offsetof(GSScanlineLocalData, p.f)], xmm1); - } - - if(m_en.z) - { - // GSVector4 z = p.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - if(m_sel.zoverflow) - { - // m_local.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); - - mov(r9, (size_t)&GSVector4::m_half); - - movss(xmm1, ptr[r9]); - shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0)); - mulps(xmm1, xmm0); - cvttps2dq(xmm1, xmm1); - pslld(xmm1, 1); - - cvttps2dq(xmm0, xmm0); - pcmpeqd(xmm2, xmm2); - psrld(xmm2, 31); - pand(xmm0, xmm2); - - por(xmm0, xmm1); - } - else - { - // m_local.p.z = GSVector4i(z); - - cvttps2dq(xmm0, xmm0); - } - - movdqa(ptr[r8 + offsetof(GSScanlineLocalData, p.z)], xmm0); - } - } -} - -void GSSetupPrimCodeGenerator::Texture() -{ - if(!m_en.t) - { - return; - } - - // GSVector4 t = dscan.t; - - movaps(xmm0, ptr[rdx + offsetof(GSVertexSW, t)]); - - movaps(xmm1, xmm0); - mulps(xmm1, xmm3); - - if(m_sel.fst) - { - // m_local.d4.stq = GSVector4i(t * 4.0f); - - cvttps2dq(xmm1, xmm1); - - movdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1); - } - else - { - // m_local.d4.stq = t * 4.0f; - - movaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1); - } - - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector4 ds = t.xxxx(); - // GSVector4 dt = t.yyyy(); - // GSVector4 dq = t.zzzz(); - - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for(int i = 0; i < 4; i++) - { - // GSVector4 v = ds/dt * m_shift[i]; - - movaps(xmm2, xmm1); - mulps(xmm2, Xmm(4 + i)); - - if(m_sel.fst) - { - // m_local.d[i].s/t = GSVector4i(v); - - cvttps2dq(xmm2, xmm2); - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch(j) - { - case 0: movdqa(ptr[r8 + variableOffsetS], xmm2); break; - case 1: movdqa(ptr[r8 + variableOffsetT], xmm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0])); - - switch(j) - { - case 0: movaps(ptr[r8 + variableOffsetS], xmm2); break; - case 1: movaps(ptr[r8 + variableOffsetT], xmm2); break; - case 2: movaps(ptr[r8 + variableOffsetQ], xmm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color() -{ - if(!m_en.c) - { - return; - } - - if(m_sel.iip) - { - // GSVector4 c = dscan.c; - - movaps(xmm0, ptr[rdx + offsetof(GSVertexSW, c)]); - movaps(xmm1, xmm0); - - // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, xmm3); - cvttps2dq(xmm2, xmm2); - pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0)); - packssdw(xmm2, xmm2); - movdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.c)], xmm2); - - // xmm3 is not needed anymore - - // GSVector4 dr = c.xxxx(); - // GSVector4 db = c.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - - for(int i = 0; i < 4; i++) - { - // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - packssdw(xmm2, xmm2); - - // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); - - movaps(xmm3, xmm1); - mulps(xmm3, Xmm(4 + i)); - cvttps2dq(xmm3, xmm3); - packssdw(xmm3, xmm3); - - // m_local.d[i].rb = r.upl16(b); - - punpcklwd(xmm2, xmm3); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0])); - movdqa(ptr[r8 + variableOffset], xmm2); - } - - // GSVector4 c = dscan.c; - - movaps(xmm0, ptr[rdx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - movaps(xmm1, xmm0); - - // GSVector4 dg = c.yyyy(); - // GSVector4 da = c.wwww(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - - for(int i = 0; i < 4; i++) - { - // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - packssdw(xmm2, xmm2); - - // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); - - movaps(xmm3, xmm1); - mulps(xmm3, Xmm(4 + i)); - cvttps2dq(xmm3, xmm3); - packssdw(xmm3, xmm3); - - // m_local.d[i].ga = g.upl16(a); - - punpcklwd(xmm2, xmm3); - - const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0])); - movdqa(ptr[r8 + variableOffset], xmm2); - } - } - else - { - // GSVector4i c = GSVector4i(vertices[0].c); - - cvttps2dq(xmm0, ptr[rcx + offsetof(GSVertexSW, c)]); - - // c = c.upl16(c.zwxy()); - - pshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - punpcklwd(xmm0, xmm1); - - // if(!tme) c = c.srl16(7); - - if(m_sel.tfx == TFX_NONE) - { - psrlw(xmm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - movdqa(ptr[r8 + offsetof(GSScanlineLocalData, c.rb)], xmm1); - movdqa(ptr[r8 + offsetof(GSScanlineLocalData, c.ga)], xmm2); - } -} - -#endif diff --git a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.avx.cpp b/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.avx.cpp deleted file mode 100644 index 21a7d47c97..0000000000 --- a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.avx.cpp +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE == 0x500 && !(defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -static const int _args = 0; -static const int _vertex = _args + 4; -static const int _index = _args + 8; -static const int _dscan = _args + 12; - -void GSSetupPrimCodeGenerator::Generate() -{ - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(edx, dword[esp + _dscan]); - - for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) - { - vmovaps(Xmm(3 + i), ptr[&m_shift[i]]); - } - } - - Depth(); - - Texture(); - - Color(); - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth() -{ - if(!m_en.z && !m_en.f) - { - return; - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 p = dscan.p; - - vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, p)]); - - if(m_en.f) - { - // GSVector4 df = p.wwww(); - - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - - // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); - - vmulps(xmm2, xmm1, xmm3); - vcvttps2dq(xmm2, xmm2); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.d4.f], xmm2); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); - - vmulps(xmm2, xmm1, Xmm(4 + i)); - vcvttps2dq(xmm2, xmm2); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.d[i].f], xmm2); - } - } - - if(m_en.z) - { - // GSVector4 dz = p.zzzz(); - - vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d4.z = dz * 4.0f; - - vmulps(xmm1, xmm0, xmm3); - vmovdqa(ptr[&m_local.d4.z], xmm1); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].z = dz * m_shift[i]; - - vmulps(xmm1, xmm0, Xmm(4 + i)); - vmovdqa(ptr[&m_local.d[i].z], xmm1); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * 1]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - - vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); - - if(m_en.f) - { - // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); - - vcvttps2dq(xmm1, xmm0); - vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - vmovdqa(ptr[&m_local.p.f], xmm1); - } - - if(m_en.z) - { - // uint32 z is bypassed in t.w - - vmovdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]); - vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - vmovdqa(ptr[&m_local.p.z], xmm0); - } - } -} - -void GSSetupPrimCodeGenerator::Texture() -{ - if(!m_en.t) - { - return; - } - - // GSVector4 t = dscan.t; - - vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, t)]); - - vmulps(xmm1, xmm0, xmm3); - - if(m_sel.fst) - { - // m_local.d4.stq = GSVector4i(t * 4.0f); - - vcvttps2dq(xmm1, xmm1); - - vmovdqa(ptr[&m_local.d4.stq], xmm1); - } - else - { - // m_local.d4.stq = t * 4.0f; - - vmovaps(ptr[&m_local.d4.stq], xmm1); - } - - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector4 ds = t.xxxx(); - // GSVector4 dt = t.yyyy(); - // GSVector4 dq = t.zzzz(); - - vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4 v = ds/dt * m_shift[i]; - - vmulps(xmm2, xmm1, Xmm(4 + i)); - - if(m_sel.fst) - { - // m_local.d[i].s/t = GSVector4i(v); - - vcvttps2dq(xmm2, xmm2); - - switch(j) - { - case 0: vmovdqa(ptr[&m_local.d[i].s], xmm2); break; - case 1: vmovdqa(ptr[&m_local.d[i].t], xmm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - switch(j) - { - case 0: vmovaps(ptr[&m_local.d[i].s], xmm2); break; - case 1: vmovaps(ptr[&m_local.d[i].t], xmm2); break; - case 2: vmovaps(ptr[&m_local.d[i].q], xmm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color() -{ - if(!m_en.c) - { - return; - } - - if(m_sel.iip) - { - // GSVector4 c = dscan.c; - - vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); - - // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - vmulps(xmm1, xmm0, xmm3); - vcvttps2dq(xmm1, xmm1); - vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0)); - vpackssdw(xmm1, xmm1); - vmovdqa(ptr[&m_local.d4.c], xmm1); - - // xmm3 is not needed anymore - - // GSVector4 dr = c.xxxx(); - // GSVector4 db = c.zzzz(); - - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); - - vmulps(xmm0, xmm2, Xmm(4 + i)); - vcvttps2dq(xmm0, xmm0); - vpackssdw(xmm0, xmm0); - - // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); - - vmulps(xmm1, xmm3, Xmm(4 + i)); - vcvttps2dq(xmm1, xmm1); - vpackssdw(xmm1, xmm1); - - // m_local.d[i].rb = r.upl16(b); - - vpunpcklwd(xmm0, xmm1); - vmovdqa(ptr[&m_local.d[i].rb], xmm0); - } - - // GSVector4 c = dscan.c; - - vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - - // GSVector4 dg = c.yyyy(); - // GSVector4 da = c.wwww(); - - vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); - - vmulps(xmm0, xmm2, Xmm(4 + i)); - vcvttps2dq(xmm0, xmm0); - vpackssdw(xmm0, xmm0); - - // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); - - vmulps(xmm1, xmm3, Xmm(4 + i)); - vcvttps2dq(xmm1, xmm1); - vpackssdw(xmm1, xmm1); - - // m_local.d[i].ga = g.upl16(a); - - vpunpcklwd(xmm0, xmm1); - vmovdqa(ptr[&m_local.d[i].ga], xmm0); - } - } - else - { - // GSVector4i c = GSVector4i(vertex[index[last].c); - - int last = 0; - - switch(m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * last]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - } - - vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); - - // c = c.upl16(c.zwxy()); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(xmm0, xmm1); - - // if(!tme) c = c.srl16(7); - - if(m_sel.tfx == TFX_NONE) - { - vpsrlw(xmm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vmovdqa(ptr[&m_local.c.rb], xmm1); - vmovdqa(ptr[&m_local.c.ga], xmm2); - } -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.avx2.cpp b/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.avx2.cpp deleted file mode 100644 index 172d053a5a..0000000000 --- a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.avx2.cpp +++ /dev/null @@ -1,353 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE >= 0x501 && !(defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -static const int _args = 0; -static const int _vertex = _args + 4; -static const int _index = _args + 8; -static const int _dscan = _args + 12; - -void GSSetupPrimCodeGenerator::Generate() -{ - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(edx, dword[esp + _dscan]); - - for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) - { - vmovaps(Ymm(3 + i), ptr[&m_shift[i]]); - } - } - - Depth(); - - Texture(); - - Color(); - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth() -{ - if(!m_en.z && !m_en.f) - { - return; - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]); - - vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, p)]); - - vmulps(ymm1, ymm0, ymm3); - - if(m_en.z) - { - // m_local.d8.p.z = dp8.extract32<2>(); - - vextractps(ptr[&m_local.d8.p.z], xmm1, 2); - } - - if(m_en.f) - { - // m_local.d8.p.f = GSVector4i(dp8).extract32<3>(); - - vcvtps2dq(ymm2, ymm1); - vpextrd(ptr[&m_local.d8.p.f], xmm2, 3); - } - - if(m_en.z) - { - // GSVector8 dz = GSVector8(dscan.p).zzzz(); - - vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - } - - if(m_en.f) - { - // GSVector8 df = GSVector8(dscan.p).wwww(); - - vshufps(ymm1, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3)); - } - - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - if(m_en.z) - { - // m_local.d[i].z = dz * shift[1 + i]; - - if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]); - vmovaps(ptr[&m_local.d[i].z], ymm0); - } - - if(m_en.f) - { - // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh(); - - if(i < 4) vmulps(ymm0, ymm1, Ymm(4 + i)); - else vmulps(ymm0, ymm1, ptr[&m_shift[i + 1]]); - vcvttps2dq(ymm0, ymm0); - vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.d[i].f], ymm0); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * 1]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - - if(m_en.f) - { - // m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>(); - - vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); - vcvttps2dq(xmm0, xmm0); - vpextrd(ptr[&m_local.p.f], xmm0, 3); - } - - if(m_en.z) - { - // m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w - - mov(eax, ptr[ecx + offsetof(GSVertexSW, t.w)]); - mov(ptr[&m_local.p.z], eax); - } - } -} - -void GSSetupPrimCodeGenerator::Texture() -{ - if(!m_en.t) - { - return; - } - - // GSVector8 dt(dscan.t); - - vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, t)]); - - // GSVector8 dt8 = dt * shift[0]; - - vmulps(ymm1, ymm0, ymm3); - - if(m_sel.fst) - { - // m_local.d8.stq = GSVector8::cast(GSVector8i(dt8)); - - vcvttps2dq(ymm1, ymm1); - - vmovdqa(ptr[&m_local.d8.stq], xmm1); - } - else - { - // m_local.d8.stq = dt8; - - vmovaps(ptr[&m_local.d8.stq], xmm1); - } - - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector8 dstq = dt.xxxx/yyyy/zzzz(); - - vshufps(ymm1, ymm0, ymm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8 v = dstq * shift[1 + i]; - - if(i < 4) vmulps(ymm2, ymm1, Ymm(4 + i)); - else vmulps(ymm2, ymm1, ptr[&m_shift[i + 1]]); - - if(m_sel.fst) - { - // m_local.d[i].s/t = GSVector8::cast(GSVector8i(v)); - - vcvttps2dq(ymm2, ymm2); - - switch(j) - { - case 0: vmovdqa(ptr[&m_local.d[i].s], ymm2); break; - case 1: vmovdqa(ptr[&m_local.d[i].t], ymm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - switch(j) - { - case 0: vmovaps(ptr[&m_local.d[i].s], ymm2); break; - case 1: vmovaps(ptr[&m_local.d[i].t], ymm2); break; - case 2: vmovaps(ptr[&m_local.d[i].q], ymm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color() -{ - if(!m_en.c) - { - return; - } - - if(m_sel.iip) - { - // GSVector8 dc(dscan.c); - - vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, c)]); - - // m_local.d8.c = GSVector8i(dc * shift[0]).xzyw().ps32(); - - vmulps(ymm1, ymm0, ymm3); - vcvttps2dq(ymm1, ymm1); - vpshufd(ymm1, ymm1, _MM_SHUFFLE(3, 1, 2, 0)); - vpackssdw(ymm1, ymm1); - vmovq(ptr[&m_local.d8.c], xmm1); - - // ymm3 is not needed anymore - - // GSVector8 dr = dc.xxxx(); - // GSVector8 db = dc.zzzz(); - - vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(0, 0, 0, 0)); - vshufps(ymm3, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); - - if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]); - vcvttps2dq(ymm0, ymm0); - vpackssdw(ymm0, ymm0); - - // GSVector4i b = GSVector8i(db * shift[1 + i]).ps32(); - - if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i)); - else vmulps(ymm1, ymm3, ptr[&m_shift[i + 1]]); - vcvttps2dq(ymm1, ymm1); - vpackssdw(ymm1, ymm1); - - // m_local.d[i].rb = r.upl16(b); - - vpunpcklwd(ymm0, ymm1); - vmovdqa(ptr[&m_local.d[i].rb], ymm0); - } - - // GSVector8 dc(dscan.c); - - vbroadcastf128(ymm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - - // GSVector8 dg = dc.yyyy(); - // GSVector8 da = dc.wwww(); - - vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(1, 1, 1, 1)); - vshufps(ymm3, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3)); - - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) - { - // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); - - if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]); - vcvttps2dq(ymm0, ymm0); - vpackssdw(ymm0, ymm0); - - // GSVector8i a = GSVector8i(da * shift[1 + i]).ps32(); - - if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i)); - else vmulps(ymm1, ymm3, ptr[&m_shift[i + 1]]); - vcvttps2dq(ymm1, ymm1); - vpackssdw(ymm1, ymm1); - - // m_local.d[i].ga = g.upl16(a); - - vpunpcklwd(ymm0, ymm1); - vmovdqa(ptr[&m_local.d[i].ga], ymm0); - } - } - else - { - // GSVector8i c = GSVector8i(GSVector8(vertex[index[last]].c)); - - int last = 0; - - switch(m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * last]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - } - - vbroadcasti128(ymm0, ptr[ecx + offsetof(GSVertexSW, c)]); - vcvttps2dq(ymm0, ymm0); - - // c = c.upl16(c.zwxy()); - - vpshufd(ymm1, ymm0, _MM_SHUFFLE(1, 0, 3, 2)); - vpunpcklwd(ymm0, ymm1); - - // if(!tme) c = c.srl16(7); - - if(m_sel.tfx == TFX_NONE) - { - vpsrlw(ymm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - vpshufd(ymm1, ymm0, _MM_SHUFFLE(0, 0, 0, 0)); - vpshufd(ymm2, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - - vmovdqa(ptr[&m_local.c.rb], ymm1); - vmovdqa(ptr[&m_local.c.ga], ymm2); - } -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.cpp b/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.cpp deleted file mode 100644 index 008a12a8f5..0000000000 --- a/plugins/GSdx_legacy/GSSetupPrimCodeGenerator.x86.cpp +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSSetupPrimCodeGenerator.h" -#include "GSVertexSW.h" - -#if _M_SSE < 0x500 && !(defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -static const int _args = 0; -static const int _vertex = _args + 4; -static const int _index = _args + 8; -static const int _dscan = _args + 12; - -void GSSetupPrimCodeGenerator::Generate() -{ - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) - { - mov(edx, dword[esp + _dscan]); - - for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) - { - movaps(Xmm(3 + i), ptr[&m_shift[i]]); - } - } - - Depth(); - - Texture(); - - Color(); - - ret(); -} - -void GSSetupPrimCodeGenerator::Depth() -{ - if(!m_en.z && !m_en.f) - { - return; - } - - if(m_sel.prim != GS_SPRITE_CLASS) - { - // GSVector4 p = dscan.p; - - movaps(xmm0, ptr[edx + offsetof(GSVertexSW, p)]); - - if(m_en.f) - { - // GSVector4 df = p.wwww(); - - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - - // m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh(); - - movaps(xmm2, xmm1); - mulps(xmm2, xmm3); - cvttps2dq(xmm2, xmm2); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - movdqa(ptr[&m_local.d4.f], xmm2); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); - - movaps(xmm2, xmm1); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); - movdqa(ptr[&m_local.d[i].f], xmm2); - } - } - - if(m_en.z) - { - // GSVector4 dz = p.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - // m_local.d4.z = dz * 4.0f; - - movaps(xmm1, xmm0); - mulps(xmm1, xmm3); - movdqa(ptr[&m_local.d4.z], xmm1); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // m_local.d[i].z = dz * m_shift[i]; - - movaps(xmm1, xmm0); - mulps(xmm1, Xmm(4 + i)); - movdqa(ptr[&m_local.d[i].z], xmm1); - } - } - } - else - { - // GSVector4 p = vertex[index[1]].p; - - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * 1]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - - movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); - - if(m_en.f) - { - // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); - - cvttps2dq(xmm1, xmm0); - pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - movdqa(ptr[&m_local.p.f], xmm1); - } - - if(m_en.z) - { - // uint32 z is bypassed in t.w - - movdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]); - pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - movdqa(ptr[&m_local.p.z], xmm0); - } - } -} - -void GSSetupPrimCodeGenerator::Texture() -{ - if(!m_en.t) - { - return; - } - - // GSVector4 t = dscan.t; - - movaps(xmm0, ptr[edx + offsetof(GSVertexSW, t)]); - - movaps(xmm1, xmm0); - mulps(xmm1, xmm3); - - if(m_sel.fst) - { - // m_local.d4.stq = GSVector4i(t * 4.0f); - - cvttps2dq(xmm1, xmm1); - - movdqa(ptr[&m_local.d4.stq], xmm1); - } - else - { - // m_local.d4.stq = t * 4.0f; - - movaps(ptr[&m_local.d4.stq], xmm1); - } - - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) - { - // GSVector4 ds = t.xxxx(); - // GSVector4 dt = t.yyyy(); - // GSVector4 dq = t.zzzz(); - - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j)); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4 v = ds/dt * m_shift[i]; - - movaps(xmm2, xmm1); - mulps(xmm2, Xmm(4 + i)); - - if(m_sel.fst) - { - // m_local.d[i].s/t = GSVector4i(v); - - cvttps2dq(xmm2, xmm2); - - switch(j) - { - case 0: movdqa(ptr[&m_local.d[i].s], xmm2); break; - case 1: movdqa(ptr[&m_local.d[i].t], xmm2); break; - } - } - else - { - // m_local.d[i].s/t/q = v; - - switch(j) - { - case 0: movaps(ptr[&m_local.d[i].s], xmm2); break; - case 1: movaps(ptr[&m_local.d[i].t], xmm2); break; - case 2: movaps(ptr[&m_local.d[i].q], xmm2); break; - } - } - } - } -} - -void GSSetupPrimCodeGenerator::Color() -{ - if(!m_en.c) - { - return; - } - - if(m_sel.iip) - { - // GSVector4 c = dscan.c; - - movaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); - movaps(xmm1, xmm0); - - // m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, xmm3); - cvttps2dq(xmm2, xmm2); - pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0)); - packssdw(xmm2, xmm2); - movdqa(ptr[&m_local.d4.c], xmm2); - - // xmm3 is not needed anymore - - // GSVector4 dr = c.xxxx(); - // GSVector4 db = c.zzzz(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - packssdw(xmm2, xmm2); - - // GSVector4i b = GSVector4i(db * m_shift[i]).ps32(); - - movaps(xmm3, xmm1); - mulps(xmm3, Xmm(4 + i)); - cvttps2dq(xmm3, xmm3); - packssdw(xmm3, xmm3); - - // m_local.d[i].rb = r.upl16(b); - - punpcklwd(xmm2, xmm3); - movdqa(ptr[&m_local.d[i].rb], xmm2); - } - - // GSVector4 c = dscan.c; - - movaps(xmm0, ptr[edx + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it - movaps(xmm1, xmm0); - - // GSVector4 dg = c.yyyy(); - // GSVector4 da = c.wwww(); - - shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) - { - // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); - - movaps(xmm2, xmm0); - mulps(xmm2, Xmm(4 + i)); - cvttps2dq(xmm2, xmm2); - packssdw(xmm2, xmm2); - - // GSVector4i a = GSVector4i(da * m_shift[i]).ps32(); - - movaps(xmm3, xmm1); - mulps(xmm3, Xmm(4 + i)); - cvttps2dq(xmm3, xmm3); - packssdw(xmm3, xmm3); - - // m_local.d[i].ga = g.upl16(a); - - punpcklwd(xmm2, xmm3); - movdqa(ptr[&m_local.d[i].ga], xmm2); - } - } - else - { - // GSVector4i c = GSVector4i(vertex[index[last].c); - - int last = 0; - - switch(m_sel.prim) - { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; - } - - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() - { - mov(ecx, ptr[esp + _index]); - mov(ecx, ptr[ecx + sizeof(uint32) * last]); - shl(ecx, 6); // * sizeof(GSVertexSW) - add(ecx, ptr[esp + _vertex]); - } - - cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); - - // c = c.upl16(c.zwxy()); - - pshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2)); - punpcklwd(xmm0, xmm1); - - // if(!tme) c = c.srl16(7); - - if(m_sel.tfx == TFX_NONE) - { - psrlw(xmm0, 7); - } - - // m_local.c.rb = c.xxxx(); - // m_local.c.ga = c.zzzz(); - - pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - - movdqa(ptr[&m_local.c.rb], xmm1); - movdqa(ptr[&m_local.c.ga], xmm2); - } -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSShaderOGL.cpp b/plugins/GSdx_legacy/GSShaderOGL.cpp deleted file mode 100644 index c337dfab44..0000000000 --- a/plugins/GSdx_legacy/GSShaderOGL.cpp +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSShaderOGL.h" -#include "GLState.h" - -GSShaderOGL::GSShaderOGL(bool debug) : - m_pipeline(0), - m_debug_shader(debug) -{ - m_single_prog.clear(); - if (GLLoader::found_GL_ARB_separate_shader_objects) { - glGenProgramPipelines(1, &m_pipeline); - glBindProgramPipeline(m_pipeline); - } -} - -GSShaderOGL::~GSShaderOGL() -{ - if (GLLoader::found_GL_ARB_separate_shader_objects) - glDeleteProgramPipelines(1, &m_pipeline); - - for (auto it = m_single_prog.begin(); it != m_single_prog.end() ; it++) glDeleteProgram(it->second); - m_single_prog.clear(); -} - -void GSShaderOGL::VS(GLuint s) -{ - if (GLState::vs != s) - { - GLState::vs = s; - GLState::dirty_prog = true; - if (GLLoader::found_GL_ARB_separate_shader_objects) - glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s); - } -} - -void GSShaderOGL::PS(GLuint s) -{ -#ifdef _DEBUG - if (true) -#else - if (GLState::ps != s) -#endif - { - // In debug always sets the program. It allow to replace the program in apitrace easily. - GLState::ps = s; - GLState::dirty_prog = true; - if (GLLoader::found_GL_ARB_separate_shader_objects) { - glUseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s); - } - } -} - -void GSShaderOGL::GS(GLuint s) -{ - if (GLState::gs != s) - { - GLState::gs = s; - GLState::dirty_prog = true; - if (GLLoader::found_GL_ARB_separate_shader_objects) - glUseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, s); - } -} - -bool GSShaderOGL::ValidateShader(GLuint s) -{ - if (!m_debug_shader) return true; - - GLint status = 0; - glGetShaderiv(s, GL_COMPILE_STATUS, &status); - if (status) return true; - - GLint log_length = 0; - glGetShaderiv(s, GL_INFO_LOG_LENGTH, &log_length); - if (log_length > 0) { - char* log = new char[log_length]; - glGetShaderInfoLog(s, log_length, NULL, log); - fprintf(stderr, "%s", log); - delete[] log; - } - fprintf(stderr, "\n"); - - return false; -} - -bool GSShaderOGL::ValidateProgram(GLuint p) -{ - if (!m_debug_shader) return true; - - GLint status = 0; - glGetProgramiv(p, GL_LINK_STATUS, &status); - if (status) return true; - - GLint log_length = 0; - glGetProgramiv(p, GL_INFO_LOG_LENGTH, &log_length); - if (log_length > 0) { - char* log = new char[log_length]; - glGetProgramInfoLog(p, log_length, NULL, log); - fprintf(stderr, "%s", log); - delete[] log; - } - fprintf(stderr, "\n"); - - return false; -} - -bool GSShaderOGL::ValidatePipeline(GLuint p) -{ - if (!m_debug_shader) return true; - - // FIXME: might be mandatory to validate the pipeline - glValidateProgramPipeline(p); - - GLint status = 0; - glGetProgramPipelineiv(p, GL_VALIDATE_STATUS, &status); - if (status) return true; - - GLint log_length = 0; - glGetProgramPipelineiv(p, GL_INFO_LOG_LENGTH, &log_length); - if (log_length > 0) { - char* log = new char[log_length]; - glGetProgramPipelineInfoLog(p, log_length, NULL, log); - fprintf(stderr, "%s", log); - delete[] log; - } - fprintf(stderr, "\n"); - - return false; -} - -GLuint GSShaderOGL::LinkNewProgram() -{ - GLuint p = glCreateProgram(); - if (GLState::vs) glAttachShader(p, GLState::vs); - if (GLState::ps) glAttachShader(p, GLState::ps); - if (GLState::gs) glAttachShader(p, GLState::gs); - - glLinkProgram(p); - - ValidateProgram(p); - - return p; -} - -void GSShaderOGL::UseProgram() -{ - if (GLState::dirty_prog) { - if (!GLLoader::found_GL_ARB_separate_shader_objects) { - hash_map::iterator it; - // Note: shader are integer lookup pointer. They start from 1 and incr - // every time you create a new shader OR a new program. - // Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128. GS has only 2 programs - // We migth be able to pack the value in a 32bits int - // I would need to check the behavior on Nvidia (pause/resume). - uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps; - it = m_single_prog.find(sel); - if (it == m_single_prog.end()) { - GLState::program = LinkNewProgram(); - m_single_prog[sel] = GLState::program; - - ValidateProgram(GLState::program); - - glUseProgram(GLState::program); - } else { - GLuint prog = it->second; - if (prog != GLState::program) { - GLState::program = prog; - glUseProgram(GLState::program); - } - } - } - } - - GLState::dirty_prog = false; -} - -std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, const std::string& macro) -{ - std::string header; - header = "#version 330 core\n"; - // Need GL version 420 - header += "#extension GL_ARB_shading_language_420pack: require\n"; - if (GLLoader::found_GL_ARB_separate_shader_objects) { - // Need GL version 410 - header += "#extension GL_ARB_separate_shader_objects: require\n"; - } - if (GLLoader::found_GL_ARB_shader_image_load_store) { - // Need GL version 420 - header += "#extension GL_ARB_shader_image_load_store: require\n"; - } else { - header += "#define DISABLE_GL42_image\n"; - } - if (GLLoader::found_GL_ARB_clip_control) { - header += "#define ZERO_TO_ONE_DEPTH\n"; - } - - // Stupid GL implementation (can't use GL_ES) - // AMD/nvidia define it to 0 - // intel window don't define it - // intel linux refuse to define it - header += "#define pGL_ES 0\n"; - - // Allow to puts several shader in 1 files - switch (type) { - case GL_VERTEX_SHADER: - header += "#define VERTEX_SHADER 1\n"; - break; - case GL_GEOMETRY_SHADER: - header += "#define GEOMETRY_SHADER 1\n"; - break; - case GL_FRAGMENT_SHADER: - header += "#define FRAGMENT_SHADER 1\n"; - break; - default: ASSERT(0); - } - - // Select the entry point ie the main function - header += format("#define %s main\n", entry.c_str()); - - header += macro; - - return header; -} - -GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& entry, GLenum type, const char* glsl_h_code, const std::string& macro_sel) -{ - ASSERT(glsl_h_code != NULL); - - GLuint program = 0; - - if (type == GL_GEOMETRY_SHADER && !GLLoader::found_geometry_shader) { - return program; - } - - // Note it is better to separate header and source file to have the good line number - // in the glsl compiler report - const char* sources[2]; - - std::string header = GenGlslHeader(entry, type, macro_sel); - int shader_nb = 1; -#if 1 - sources[0] = header.c_str(); - sources[1] = glsl_h_code; - shader_nb++; -#else - sources[0] = header.append(glsl_h_code).c_str(); -#endif - - if (GLLoader::found_GL_ARB_separate_shader_objects) { - program = glCreateShaderProgramv(type, shader_nb, sources); - } else { - program = glCreateShader(type); - glShaderSource(program, shader_nb, sources, NULL); - glCompileShader(program); - } - - bool status; - if (GLLoader::found_GL_ARB_separate_shader_objects) - status = ValidateProgram(program); - else - status = ValidateShader(program); - - if (!status) { - // print extra info - fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), program); - fprintf(stderr, "\n%s", macro_sel.c_str()); - fprintf(stderr, "\n"); - } - return program; -} - -// This function will get the binary program. Normally it must be used a caching -// solution but Nvidia also incorporates the ASM dump. Asm is nice because it allow -// to have an overview of the program performance based on the instruction number -// Note: initially I was using cg offline compiler but it doesn't support latest -// GLSL improvement (unfortunately). -int GSShaderOGL::DumpAsm(const std::string& file, GLuint p) -{ - if (!GLLoader::nvidia_buggy_driver) return 0; - - GLint binaryLength; - glGetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength); - - char* binary = new char[binaryLength+4]; - GLenum binaryFormat; - glGetProgramBinary(p, binaryLength, NULL, &binaryFormat, binary); - - FILE* outfile = fopen(file.c_str(), "w"); - ASSERT(outfile); - - // Search the magic number "!!" - int asm_ = 0; - while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_+1] != '!')) { - asm_ += 1; - } - - int instructions = -1; - if (asm_ < binaryLength) { - // Now print asm as text - char* asm_txt = strtok(&binary[asm_], "\n"); - while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) { - if (!strncmp(asm_txt, "OUT", 3) || !strncmp(asm_txt, "TEMP", 4) || !strncmp(asm_txt, "LONG", 4)) { - instructions = 0; - } else if (instructions >= 0) { - if (instructions == 0) - fprintf(outfile, "\n"); - instructions++; - } - - fprintf(outfile, "%s\n", asm_txt); - asm_txt = strtok(NULL, "\n"); - } - fprintf(outfile, "\nFound %d instructions\n", instructions); - } - fclose(outfile); - - if (instructions < 0) { - // RAW dump in case of error - fprintf(stderr, "Error: failed to find the number of instructions!\n"); - outfile = fopen(file.c_str(), "wb"); - fwrite(binary, binaryLength, 1, outfile); - fclose(outfile); - ASSERT(0); - } - - delete[] binary; - - return instructions; -} - -void GSShaderOGL::Delete(GLuint s) -{ - if (GLLoader::found_GL_ARB_separate_shader_objects) { - glDeleteProgram(s); - } else { - glDeleteShader(s); - } -} diff --git a/plugins/GSdx_legacy/GSShaderOGL.h b/plugins/GSdx_legacy/GSShaderOGL.h deleted file mode 100644 index ef00002208..0000000000 --- a/plugins/GSdx_legacy/GSShaderOGL.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -class GSShaderOGL { - GLuint m_pipeline; - hash_map m_single_prog; - const bool m_debug_shader; - - bool ValidateShader(GLuint p); - bool ValidateProgram(GLuint p); - bool ValidatePipeline(GLuint p); - - std::string GenGlslHeader(const std::string& entry, GLenum type, const std::string& macro); - GLuint LinkNewProgram(); - - public: - GSShaderOGL(bool debug); - ~GSShaderOGL(); - - void GS(GLuint s); - void PS(GLuint s); - void VS(GLuint s); - - void UseProgram(); - - GLuint Compile(const std::string& glsl_file, const std::string& entry, GLenum type, const char* glsl_h_code, const std::string& macro_sel = ""); - - int DumpAsm(const std::string& file, GLuint p); - - void Delete(GLuint s); -}; diff --git a/plugins/GSdx_legacy/GSState.cpp b/plugins/GSdx_legacy/GSState.cpp deleted file mode 100644 index 99f6ca5c40..0000000000 --- a/plugins/GSdx_legacy/GSState.cpp +++ /dev/null @@ -1,5635 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSState.h" -#include "GSdx.h" - -//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering - -static int s_crc_hack_level = 3; - -GSState::GSState() - : m_version(6) - , m_mt(false) - , m_irq(NULL) - , m_path3hack(0) - , m_init_read_fifo_supported(false) - , m_q(1.0f) - , m_texflush(true) - , m_vt(this) - , m_regs(NULL) - , m_crc(0) - , m_options(0) - , m_frameskip(0) - , m_crcinited(false) -{ - m_nativeres = theApp.GetConfig("upscale_multiplier",1) == 1; - m_mipmap = !!theApp.GetConfig("mipmap", 1); - - s_n = 0; - s_dump = !!theApp.GetConfig("dump", 0); - s_save = !!theApp.GetConfig("save", 0); - s_savet = !!theApp.GetConfig("savet", 0); - s_savez = !!theApp.GetConfig("savez", 0); - s_savef = !!theApp.GetConfig("savef", 0); - s_saven = theApp.GetConfig("saven", 0); - s_savel = theApp.GetConfig("savel", 5000); -#ifdef __linux__ - if (s_dump) { - GSmkdir("/tmp/GS_HW_dump"); - GSmkdir("/tmp/GS_SW_dump"); - } -#endif - - //s_dump = 1; - //s_save = 1; - //s_savez = 1; - //s_savet = 1; - //s_savef = 1; - //s_saven = 0; - //s_savel = 0; - - UserHacks_WildHack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_WildHack", 0) : 0; - m_crc_hack_level = theApp.GetConfig("crc_hack_level", 3); - s_crc_hack_level = m_crc_hack_level; - - memset(&m_v, 0, sizeof(m_v)); - memset(&m_vertex, 0, sizeof(m_vertex)); - memset(&m_index, 0, sizeof(m_index)); - - m_v.RGBAQ.Q = 1.0f; - - GrowVertexBuffer(); - - m_sssize = 0; - - m_sssize += sizeof(m_version); - m_sssize += sizeof(m_env.PRIM); - m_sssize += sizeof(m_env.PRMODE); - m_sssize += sizeof(m_env.PRMODECONT); - m_sssize += sizeof(m_env.TEXCLUT); - m_sssize += sizeof(m_env.SCANMSK); - m_sssize += sizeof(m_env.TEXA); - m_sssize += sizeof(m_env.FOGCOL); - m_sssize += sizeof(m_env.DIMX); - m_sssize += sizeof(m_env.DTHE); - m_sssize += sizeof(m_env.COLCLAMP); - m_sssize += sizeof(m_env.PABE); - m_sssize += sizeof(m_env.BITBLTBUF); - m_sssize += sizeof(m_env.TRXDIR); - m_sssize += sizeof(m_env.TRXPOS); - m_sssize += sizeof(m_env.TRXREG); - m_sssize += sizeof(m_env.TRXREG); // obsolete - - for(int i = 0; i < 2; i++) - { - m_sssize += sizeof(m_env.CTXT[i].XYOFFSET); - m_sssize += sizeof(m_env.CTXT[i].TEX0); - m_sssize += sizeof(m_env.CTXT[i].TEX1); - m_sssize += sizeof(m_env.CTXT[i].TEX2); - m_sssize += sizeof(m_env.CTXT[i].CLAMP); - m_sssize += sizeof(m_env.CTXT[i].MIPTBP1); - m_sssize += sizeof(m_env.CTXT[i].MIPTBP2); - m_sssize += sizeof(m_env.CTXT[i].SCISSOR); - m_sssize += sizeof(m_env.CTXT[i].ALPHA); - m_sssize += sizeof(m_env.CTXT[i].TEST); - m_sssize += sizeof(m_env.CTXT[i].FBA); - m_sssize += sizeof(m_env.CTXT[i].FRAME); - m_sssize += sizeof(m_env.CTXT[i].ZBUF); - } - - m_sssize += sizeof(m_v.RGBAQ); - m_sssize += sizeof(m_v.ST); - m_sssize += sizeof(m_v.UV); - m_sssize += sizeof(m_v.FOG); - m_sssize += sizeof(m_v.XYZ); - m_sssize += sizeof(GIFReg); // obsolete - - m_sssize += sizeof(m_tr.x); - m_sssize += sizeof(m_tr.y); - m_sssize += m_mem.m_vmsize; - m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * countof(m_path); - m_sssize += sizeof(m_q); - - PRIM = &m_env.PRIM; -// CSR->rREV = 0x20; - m_env.PRMODECONT.AC = 1; - - Reset(); - - ResetHandlers(); -} - -GSState::~GSState() -{ - if(m_vertex.buff) _aligned_free(m_vertex.buff); - if(m_index.buff) _aligned_free(m_index.buff); -} - -void GSState::SetRegsMem(uint8* basemem) -{ - ASSERT(basemem); - - m_regs = (GSPrivRegSet*)basemem; -} - -void GSState::SetIrqCallback(void (*irq)()) -{ - m_irq = irq; -} - -void GSState::SetMultithreaded(bool mt) -{ - // Some older versions of PCSX2 didn't properly set the irq callback to NULL - // in multithreaded mode (possibly because ZeroGS itself would assert in such - // cases), and didn't bind them to a dummy callback either. PCSX2 handles all - // IRQs internally when multithreaded anyway -- so let's ignore them here: - - m_mt = mt; - - if(mt) - { - m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerNull; - m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerNull; - m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerNull; - } - else - { - m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerSIGNAL; - m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerFINISH; - m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerLABEL; - } -} - -void GSState::SetFrameSkip(int skip) -{ - if(m_frameskip == skip) return; - - m_frameskip = skip; - - if(skip) - { - m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP; - - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP; - - m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = &GSState::GIFPackedRegHandlerNOP; - } - else - { - UpdateVertexKick(); - } -} - -void GSState::Reset() -{ - //printf("GSdx info: GS reset\n"); - - // FIXME: memset(m_mem.m_vm8, 0, m_mem.m_vmsize); // bios logo not shown cut in half after reset, missing graphics in GoW after first FMV - memset(&m_path[0], 0, sizeof(m_path[0]) * countof(m_path)); - memset(&m_v, 0, sizeof(m_v)); - -// PRIM = &m_env.PRIM; -// m_env.PRMODECONT.AC = 1; - - m_env.Reset(); - - PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM; - - UpdateContext(); - - UpdateVertexKick(); - - m_env.UpdateDIMX(); - - for(size_t i = 0; i < 2; i++) - { - m_env.CTXT[i].UpdateScissor(); - - m_env.CTXT[i].offset.fb = m_mem.GetOffset(m_env.CTXT[i].FRAME.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].FRAME.PSM); - m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].ZBUF.PSM); - m_env.CTXT[i].offset.tex = m_mem.GetOffset(m_env.CTXT[i].TEX0.TBP0, m_env.CTXT[i].TEX0.TBW, m_env.CTXT[i].TEX0.PSM); - m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF); - m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF); - } - - UpdateScissor(); - - m_vertex.head = 0; - m_vertex.tail = 0; - m_vertex.next = 0; - m_index.tail = 0; - - m_texflush = true; -} - -void GSState::ResetHandlers() -{ - for(size_t i = 0; i < countof(m_fpGIFPackedRegHandlers); i++) - { - m_fpGIFPackedRegHandlers[i] = &GSState::GIFPackedRegHandlerNull; - } - - m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerPRIM; - m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA; - m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ; - m_fpGIFPackedRegHandlers[GIF_REG_UV] = !UserHacks_WildHack ? &GSState::GIFPackedRegHandlerUV : &GSState::GIFPackedRegHandlerUV_Hack; - m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<0>; - m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<1>; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>; - m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG; - m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D; - m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP; - - #define SetHandlerXYZ(P) \ - m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2; \ - m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2; \ - m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2; \ - m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2; \ - m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2; \ - m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2; \ - m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2; \ - m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2; \ - m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2

; \ - m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2

; \ - - SetHandlerXYZ(GS_POINTLIST); - SetHandlerXYZ(GS_LINELIST); - SetHandlerXYZ(GS_LINESTRIP); - SetHandlerXYZ(GS_TRIANGLELIST); - SetHandlerXYZ(GS_TRIANGLESTRIP); - SetHandlerXYZ(GS_TRIANGLEFAN); - SetHandlerXYZ(GS_SPRITE); - SetHandlerXYZ(GS_INVALID); - - for(size_t i = 0; i < countof(m_fpGIFRegHandlers); i++) - { - m_fpGIFRegHandlers[i] = &GSState::GIFRegHandlerNull; - } - - m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM; - m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ; - m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST; - m_fpGIFRegHandlers[GIF_A_D_REG_UV] = !UserHacks_WildHack ? &GSState::GIFRegHandlerUV : &GSState::GIFRegHandlerUV_Hack; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG; - m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_1] = &GSState::GIFRegHandlerTEX2<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_2] = &GSState::GIFRegHandlerTEX2<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_1] = &GSState::GIFRegHandlerXYOFFSET<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_2] = &GSState::GIFRegHandlerXYOFFSET<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE; - m_fpGIFRegHandlers[GIF_A_D_REG_TEXCLUT] = &GSState::GIFRegHandlerTEXCLUT; - m_fpGIFRegHandlers[GIF_A_D_REG_SCANMSK] = &GSState::GIFRegHandlerSCANMSK; - m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_1] = &GSState::GIFRegHandlerMIPTBP1<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_2] = &GSState::GIFRegHandlerMIPTBP1<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_1] = &GSState::GIFRegHandlerMIPTBP2<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_2] = &GSState::GIFRegHandlerMIPTBP2<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEXA] = &GSState::GIFRegHandlerTEXA; - m_fpGIFRegHandlers[GIF_A_D_REG_FOGCOL] = &GSState::GIFRegHandlerFOGCOL; - m_fpGIFRegHandlers[GIF_A_D_REG_TEXFLUSH] = &GSState::GIFRegHandlerTEXFLUSH; - m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_1] = &GSState::GIFRegHandlerSCISSOR<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_2] = &GSState::GIFRegHandlerSCISSOR<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_1] = &GSState::GIFRegHandlerALPHA<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_2] = &GSState::GIFRegHandlerALPHA<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_DIMX] = &GSState::GIFRegHandlerDIMX; - m_fpGIFRegHandlers[GIF_A_D_REG_DTHE] = &GSState::GIFRegHandlerDTHE; - m_fpGIFRegHandlers[GIF_A_D_REG_COLCLAMP] = &GSState::GIFRegHandlerCOLCLAMP; - m_fpGIFRegHandlers[GIF_A_D_REG_TEST_1] = &GSState::GIFRegHandlerTEST<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEST_2] = &GSState::GIFRegHandlerTEST<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_PABE] = &GSState::GIFRegHandlerPABE; - m_fpGIFRegHandlers[GIF_A_D_REG_FBA_1] = &GSState::GIFRegHandlerFBA<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_FBA_2] = &GSState::GIFRegHandlerFBA<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_1] = &GSState::GIFRegHandlerFRAME<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_2] = &GSState::GIFRegHandlerFRAME<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_1] = &GSState::GIFRegHandlerZBUF<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_2] = &GSState::GIFRegHandlerZBUF<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_BITBLTBUF] = &GSState::GIFRegHandlerBITBLTBUF; - m_fpGIFRegHandlers[GIF_A_D_REG_TRXPOS] = &GSState::GIFRegHandlerTRXPOS; - m_fpGIFRegHandlers[GIF_A_D_REG_TRXREG] = &GSState::GIFRegHandlerTRXREG; - m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR; - m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG; - - SetMultithreaded(m_mt); -} - -GSVector4i GSState::GetDisplayRect(int i) -{ - if(i < 0) i = IsEnabled(1) ? 1 : 0; - int height = (m_regs->DISP[i].DISPLAY.DH + 1) / (m_regs->DISP[i].DISPLAY.MAGV + 1); - int width = (m_regs->DISP[i].DISPLAY.DW + 1) / (m_regs->DISP[i].DISPLAY.MAGH + 1); - GSVector4i r; - - //Some games (such as Pool Paradise) use alternate line reading and provide a massive height which is really half. - if (height > 640 && !Vmode_VESA_DTV) - { - height /= 2; - } - - r.left = m_regs->DISP[i].DISPLAY.DX / (m_regs->DISP[i].DISPLAY.MAGH + 1); - r.top = m_regs->DISP[i].DISPLAY.DY / (m_regs->DISP[i].DISPLAY.MAGV + 1); - r.right = r.left + width; - r.bottom = r.top + height; - - // Useful for debugging games: - //printf("DW: %d , DH: %d , left: %d , right: %d , top: %d , down: %d , MAGH: %d , MAGV: %d\n", m_regs->DISP[i].DISPLAY.DW, m_regs->DISP[i].DISPLAY.DH, r.left, r.right, r.top, r.bottom , m_regs->DISP[i].DISPLAY.MAGH,m_regs->DISP[i].DISPLAY.MAGV); - - return r; -} - -GSVector4i GSState::GetFrameRect(int i) -{ - if (i < 0) i = IsEnabled(1) ? 1 : 0; - - GSVector4i r = GetDisplayRect(i); - - int w = r.width(); - int h = r.height(); - -// NTSC: Saturate higher height values for games which have CRTC width lower than 640. -// Some NTSC mode games request higher height values for accurate display size / position when width is 640 -// Testcases : PS logo (640x512) , Resident Evil:CVX (640x480). potentially more test cases... - - if (Vmode_NTSC && h > 448 && w < 640) - h = 448; - - if (m_regs->SMODE2.INT && m_regs->SMODE2.FFMD && h > 1) - h >>= 1; - - r.left = m_regs->DISP[i].DISPFB.DBX; - r.top = m_regs->DISP[i].DISPFB.DBY; - r.right = r.left + w; - r.bottom = r.top + h; - - /*static GSVector4i old_r = (GSVector4i) 0; - if ((old_r.left != r.left) || (old_r.right != r.right) || (old_r.top != r.top) || (old_r.right != r.right)){ - printf("w %d h %d left %d top %d right %d bottom %d\n",w,h,r.left,r.top,r.right,r.bottom); - } - old_r = r;*/ - - return r; -} - -GSVector2i GSState::GetDeviceSize(int i) -{ - // TODO: return (m_regs->SMODE1.CMOD & 1) ? GSVector2i(640, 576) : GSVector2i(640, 480); - - // TODO: other params of SMODE1 should affect the true device display size - - // TODO2: pal games at 60Hz - - if(i < 0) i = IsEnabled(1) ? 1 : 0; - - GSVector4i r = GetDisplayRect(i); - - int w = r.width(); - int h = r.height(); - - /*if(h == 2 * 416 || h == 2 * 448 || h == 2 * 512) - { - h /= 2; - } - else - { - h = (m_regs->SMODE1.CMOD & 1) ? 512 : 448; - }*/ - - //Fixme : Just slightly better than the hack above - if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD && h > 1) - { - if (IsEnabled(0) || IsEnabled(1)) - { - h >>= 1; - } - } - - //Fixme: These games elude the code above, worked with the old hack - else if(m_game.title == CRC::SilentHill2 || m_game.title == CRC::SilentHill3) - { - h /= 2; - } - - return GSVector2i(w, h); - -} - -bool GSState::IsEnabled(int i) -{ - ASSERT(i >= 0 && i < 2); - - if(i == 0 && m_regs->PMODE.EN1) - { - return m_regs->DISP[0].DISPLAY.DW || m_regs->DISP[0].DISPLAY.DH; - } - else if(i == 1 && m_regs->PMODE.EN2) - { - return m_regs->DISP[1].DISPLAY.DW || m_regs->DISP[1].DISPLAY.DH; - } - - return false; -} - -float GSState::GetTvRefreshRate() -{ - float vertical_frequency = 0; - - switch (m_regs->SMODE1.CMOD) - { - case 0: - { - if (Vmode_VESA_1A) vertical_frequency = 59.94f; - if (Vmode_VESA_1C) vertical_frequency = 75; - if (Vmode_VESA_2B) vertical_frequency = 60.317f; - if (Vmode_VESA_2D) vertical_frequency = 75; - if (Vmode_VESA_3B) vertical_frequency = 60.004f; - if (Vmode_VESA_3D) vertical_frequency = 75.029f; - if (Vmode_VESA_4A) vertical_frequency = 60.020f; - if (Vmode_VESA_4B) vertical_frequency = 75.025f; - if (Vmode_DTV_480P) vertical_frequency = 59.94f; - if (Vmode_DTV_720P_1080I) vertical_frequency = 60; - break; - } - - case 2: vertical_frequency = (60 / 1.001f); //NTSC - break; - case 3: vertical_frequency = 50; //PAL - break; - default: ASSERT(0); - } - - return vertical_frequency; -} - -// GIFPackedRegHandler* - -void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r) -{ - // ASSERT(0); -} - -void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r) -{ - #if _M_SSE >= 0x301 - - GSVector4i mask = GSVector4i::load(0x0c080400); - GSVector4i v = GSVector4i::load(r).shuffle8(mask); - - m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v); - - #else - - GSVector4i v = GSVector4i::load(r) & GSVector4i::x000000ff(); - - m_v.RGBAQ.u32[0] = v.rgba32(); - - #endif - - m_v.RGBAQ.Q = m_q; -} - -void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r) -{ - GSVector4i st = GSVector4i::loadl(&r->u64[0]); - GSVector4i q = GSVector4i::loadl(&r->u64[1]); - - GSVector4i::storel(&m_v.ST, st); - - q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // character shadow in Vexx, q = 0 (st also 0 on the first 16 vertices), setting it to 1.0f to avoid div by zero later - - *(int*)&m_q = GSVector4i::store(q); - - ASSERT(!std::isnan(m_q)); // See GIFRegHandlerRGBAQ - ASSERT(!std::isnan(m_v.ST.S)); // See GIFRegHandlerRGBAQ - ASSERT(!std::isnan(m_v.ST.T)); // See GIFRegHandlerRGBAQ - -#ifdef Offset_ST - GIFRegTEX0 TEX0 = m_context->TEX0; - m_v.ST.S -= 0.02f * m_q / (1 << TEX0.TW); - m_v.ST.T -= 0.02f * m_q / (1 << TEX0.TH); -#endif -} - -void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r) -{ - GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff(); - - m_v.UV = (uint32)GSVector4i::store(v.ps32(v)); -} - -void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r) -{ - GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff(); - - m_v.UV = (uint32)GSVector4i::store(v.ps32(v)); - - isPackedUV_HackFlag = true; -} - -template -void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r) -{ - /* - m_v.XYZ.X = r->XYZF2.X; - m_v.XYZ.Y = r->XYZF2.Y; - m_v.XYZ.Z = r->XYZF2.Z; - m_v.FOG = r->XYZF2.F; - */ - GSVector4i xy = GSVector4i::loadl(&r->u64[0]); - GSVector4i zf = GSVector4i::loadl(&r->u64[1]); - xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); - zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); - - m_v.m[1] = xy.upl32(zf); - - VertexKick(adc ? 1 : r->XYZF2.Skip()); -} - -template -void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r) -{ -/* - m_v.XYZ.X = r->XYZ2.X; - m_v.XYZ.Y = r->XYZ2.Y; - m_v.XYZ.Z = r->XYZ2.Z; -*/ - GSVector4i xy = GSVector4i::loadl(&r->u64[0]); - GSVector4i z = GSVector4i::loadl(&r->u64[1]); - GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); - - m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); - - VertexKick(adc ? 1 : r->XYZ2.Skip()); -} - -void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r) -{ - m_v.FOG = r->FOG.F; -} - -void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r) -{ - (this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r); -} - -void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r) -{ -} - -template -void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size) -{ - ASSERT(size > 0 && size % 3 == 0); - - const GIFPackedReg* RESTRICT r_end = r + size; - - while(r < r_end) - { - GSVector4i st = GSVector4i::loadl(&r[0].u64[0]); - GSVector4i q = GSVector4i::loadl(&r[0].u64[1]); - GSVector4i rgba = (GSVector4i::load(&r[1]) & GSVector4i::x000000ff()).ps32().pu16(); - /* - GSVector4i rg = GSVector4i::loadl(&r[1].u64[0]); - GSVector4i ba = GSVector4i::loadl(&r[1].u64[1]); - GSVector4i rbga = rg.upl8(ba); - GSVector4i rgba = rbga.upl8(rbga.zzzz()); - */ - q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ - - m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one - - GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]); - GSVector4i zf = GSVector4i::loadl(&r[2].u64[1]); - xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV)); - zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); - - m_v.m[1] = xy.upl32(zf); // TODO: only store the last one - - VertexKick(r[2].XYZF2.Skip()); - - r += 3; - } - - m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time -} - -template -void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, uint32 size) -{ - ASSERT(size > 0 && size % 3 == 0); - - const GIFPackedReg* RESTRICT r_end = r + size; - - while(r < r_end) - { - GSVector4i st = GSVector4i::loadl(&r[0].u64[0]); - GSVector4i q = GSVector4i::loadl(&r[0].u64[1]); - GSVector4i rgba = (GSVector4i::load(&r[1]) & GSVector4i::x000000ff()).ps32().pu16(); - /* - GSVector4i rg = GSVector4i::loadl(&r[1].u64[0]); - GSVector4i ba = GSVector4i::loadl(&r[1].u64[1]); - GSVector4i rbga = rg.upl8(ba); - GSVector4i rgba = rbga.upl8(rbga.zzzz()); - */ - q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ - - m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one - - GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]); - GSVector4i z = GSVector4i::loadl(&r[2].u64[1]); - GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z); - - m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); // TODO: only store the last one - - VertexKick(r[2].XYZ2.Skip()); - - r += 3; - } - - m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time -} - -void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size) -{ -} - -// GIFRegHandler* - -void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r) -{ - // ASSERT(0); -} - -__forceinline void GSState::ApplyPRIM(uint32 prim) -{ - // ASSERT(r->PRIM.PRIM < 7); - - if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim & 7)) // NOTE: assume strips/fans are converted to lists - { - if((m_env.PRIM.u32[0] ^ prim) & 0x7f8) // all fields except PRIM - { - Flush(); - } - } - else - { - Flush(); - } - - m_env.PRIM.u32[0] = prim; - m_env.PRMODE._PRIM = prim; - - UpdateContext(); - - UpdateVertexKick(); - - ASSERT(m_index.tail == 0 || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next); - - if(m_index.tail == 0) - { - m_vertex.next = 0; - } - - m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer -} - -void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r) -{ - ALIGN_STACK(32); - - ApplyPRIM(r->PRIM.u32[0]); -} - -void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r) -{ - GSVector4i rgbaq = (GSVector4i)r->RGBAQ; - - GSVector4i q = rgbaq.blend8(GSVector4i::cast(GSVector4::m_one), rgbaq == GSVector4i::zero()).yyyy(); // see GIFPackedRegHandlerSTQ - - // Silent Hill output a nan in Q to emulate the flash light. Unfortunately it - // breaks GSVertexTrace code that rely on min/max. - - q = GSVector4i::cast(GSVector4::cast(q).replace_nan(GSVector4::m_max)); - - m_v.RGBAQ = rgbaq.upl32(q); - - /* - // Silent Hill output a nan in Q to emulate the flash light. Unfortunately it - // breaks GSVertexTrace code that rely on min/max. - if (std::isnan(m_v.RGBAQ.Q)) - { - m_v.RGBAQ.Q = std::numeric_limits::max(); - } - */ -} - -void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r) -{ - m_v.ST = (GSVector4i)r->ST; - - ASSERT(!std::isnan(m_v.ST.S)); // See GIFRegHandlerRGBAQ - ASSERT(!std::isnan(m_v.ST.T)); // See GIFRegHandlerRGBAQ - -#ifdef Offset_ST - GIFRegTEX0 TEX0 = m_context->TEX0; - m_v.ST.S -= 0.02f * m_q / (1 << TEX0.TW); - m_v.ST.T -= 0.02f * m_q / (1 << TEX0.TH); -#endif -} - -void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r) -{ - m_v.UV = r->UV.u32[0] & 0x3fff3fff; -} - -void GSState::GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r) -{ - m_v.UV = r->UV.u32[0] & 0x3fff3fff; - - isPackedUV_HackFlag = false; -} - -template -void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r) -{ -/* - m_v.XYZ.X = r->XYZF.X; - m_v.XYZ.Y = r->XYZF.Y; - m_v.XYZ.Z = r->XYZF.Z; - m_v.FOG.F = r->XYZF.F; -*/ - -/* - m_v.XYZ.u32[0] = r->XYZF.u32[0]; - m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff; - m_v.FOG = r->XYZF.u32[1] >> 24; -*/ - - GSVector4i xyzf = GSVector4i::loadl(&r->XYZF); - GSVector4i xyz = xyzf & (GSVector4i::xffffffff().upl32(GSVector4i::x00ffffff())); - GSVector4i uvf = GSVector4i::load((int)m_v.UV).upl32(xyzf.srl32(24).srl<4>()); - - m_v.m[1] = xyz.upl64(uvf); - - VertexKick(adc); -} - -template -void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r) -{ - // m_v.XYZ = (GSVector4i)r->XYZ; - - m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV); - - VertexKick(adc); -} - -template void GSState::ApplyTEX0(GIFRegTEX0& TEX0) -{ - // even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing - - bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT); - - // clut loading already covered with WriteTest, for drawing only have to check CPSM and CSA (MGS3 intro skybox would be drawn piece by piece without this) - - uint64 mask = 0x1f78001c3fffffffull; // TBP0 TBW PSM TW TCC TFX CPSM CSA - - if(wt || PRIM->CTXT == i && ((TEX0.u64 ^ m_env.CTXT[i].TEX0.u64) & mask)) - { - Flush(); - } - - TEX0.CPSM &= 0xa; // 1010b - - if((TEX0.u32[0] ^ m_env.CTXT[i].TEX0.u32[0]) & 0x3ffffff) // TBP0 TBW PSM - { - m_env.CTXT[i].offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - } - - m_env.CTXT[i].TEX0 = (GSVector4i)TEX0; - - if(wt) - { - GIFRegBITBLTBUF BITBLTBUF; - GSVector4i r; - - if(TEX0.CSM == 0) - { - BITBLTBUF.SBP = TEX0.CBP; - BITBLTBUF.SBW = 1; - BITBLTBUF.SPSM = TEX0.CSM; - - r.left = 0; - r.top = 0; - r.right = GSLocalMemory::m_psm[TEX0.CPSM].bs.x; - r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].bs.y; - - int blocks = 4; - - if(GSLocalMemory::m_psm[TEX0.CPSM].bpp == 16) - { - blocks >>= 1; - } - - if(GSLocalMemory::m_psm[TEX0.PSM].bpp == 4) - { - blocks >>= 1; - } - - for(int j = 0; j < blocks; j++, BITBLTBUF.SBP++) - { - InvalidateLocalMem(BITBLTBUF, r, true); - } - } - else - { - BITBLTBUF.SBP = TEX0.CBP; - BITBLTBUF.SBW = m_env.TEXCLUT.CBW; - BITBLTBUF.SPSM = TEX0.CSM; - - r.left = m_env.TEXCLUT.COU; - r.top = m_env.TEXCLUT.COV; - r.right = r.left + GSLocalMemory::m_psm[TEX0.CPSM].pal; - r.bottom = r.top + 1; - - InvalidateLocalMem(BITBLTBUF, r, true); - } - - m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT); - } -} - -template void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r) -{ - GIFRegTEX0 TEX0 = r->TEX0; - - int tw = (int)TEX0.TW; - int th = (int)TEX0.TH; - - if(tw > 10) tw = 10; - if(th > 10) th = 10; - - if(PRIM->FST) - { - // Tokyo Xtreme Racer Drift 2, TW/TH == 0 - // Just setting the max texture size to make the texture cache allocate some surface. - // The vertex trace will narrow the updated area down to the minimum, upper-left 8x8 - // for a single letter, but it may address the whole thing if it wants to. - - if(tw == 0) tw = 10; - if(th == 0) th = 10; - } - else - { - // Yakuza, TW/TH == 0 - // The minimap is drawn using solid colors, the texture is really a 1x1 white texel, - // modulated by the vertex color. Cannot change the dimension because S/T are normalized. - } - - TEX0.TW = tw; - TEX0.TH = th; - - if((TEX0.TBW & 1) && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT4)) - { - ASSERT(TEX0.TBW == 1); // TODO // Bouken Jidai Katsugeki Goemon - - TEX0.TBW &= ~1; // GS User 2.6 - } - - ApplyTEX0(TEX0); - - if(m_env.CTXT[i].TEX1.MTBA) - { - // NOTE 1: TEX1.MXL must not be automatically set to 3 here. - // NOTE 2: Mipmap levels are tightly packed, if (tbw << 6) > (1 << tw) then the left-over space to the right is used. (common for PSM_PSMT4) - // NOTE 3: Non-rectangular textures are treated as rectangular when calculating the occupied space (height is extended, not sure about width) - - uint32 bp = TEX0.TBP0; - uint32 bw = TEX0.TBW; - uint32 w = 1u << TEX0.TW; - uint32 h = 1u << TEX0.TH; - uint32 bpp = GSLocalMemory::m_psm[TEX0.PSM].bpp; - - if(h < w) h = w; - - bp += ((w * h * bpp >> 3) + 255) >> 8; - bw = std::max(bw >> 1, 1); - w = std::max(w >> 1, 1); - h = std::max(h >> 1, 1); - - m_env.CTXT[i].MIPTBP1.TBP1 = bp; - m_env.CTXT[i].MIPTBP1.TBW1 = bw; - - bp += ((w * h * bpp >> 3) + 255) >> 8; - bw = std::max(bw >> 1, 1); - w = std::max(w >> 1, 1); - h = std::max(h >> 1, 1); - - m_env.CTXT[i].MIPTBP1.TBP2 = bp; - m_env.CTXT[i].MIPTBP1.TBW2 = bw; - - bp += ((w * h * bpp >> 3) + 255) >> 8; - bw = std::max(bw >> 1, 1); - w = std::max(w >> 1, 1); - h = std::max(h >> 1, 1); - - m_env.CTXT[i].MIPTBP1.TBP3 = bp; - m_env.CTXT[i].MIPTBP1.TBW3 = bw; - - // printf("MTBA\n"); - } -} - -template void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r) -{ - if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP) - { - Flush(); - } - - m_env.CTXT[i].CLAMP = (GSVector4i)r->CLAMP; -} - -void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r) -{ - m_v.FOG = r->FOG.F; -} - -void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r) -{ -} - -template void GSState::GIFRegHandlerTEX1(const GIFReg* RESTRICT r) -{ - if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1) - { - Flush(); - } - - m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1; -} - -template void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r) -{ - // m_env.CTXT[i].TEX2 = r->TEX2; // not used - - // TEX2 is a masked write to TEX0, for performing CLUT swaps (palette swaps). - // It only applies the following fields: - // CLD, CSA, CSM, CPSM, CBP, PSM. - // It ignores these fields (uses existing values in the context): - // TFX, TCC, TH, TW, TBW, and TBP0 - - uint64 mask = 0xFFFFFFE003F00000ull; // TEX2 bits - - GIFRegTEX0 TEX0; - - TEX0.u64 = (m_env.CTXT[i].TEX0.u64 & ~mask) | (r->u64 & mask); - - ApplyTEX0(TEX0); -} - -template void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r) -{ - GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff(); - - if(!o.eq(m_env.CTXT[i].XYOFFSET)) - { - Flush(); - } - - m_env.CTXT[i].XYOFFSET = o; - - m_env.CTXT[i].UpdateScissor(); - - UpdateScissor(); -} - -void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r) -{ - if(r->PRMODECONT != m_env.PRMODECONT) - { - Flush(); - } - - m_env.PRMODECONT.AC = r->PRMODECONT.AC; - - PRIM = m_env.PRMODECONT.AC ? &m_env.PRIM : (GIFRegPRIM*)&m_env.PRMODE; - - // if(PRIM->PRIM == 7) printf("Invalid PRMODECONT/PRIM\n"); - - UpdateContext(); - - UpdateVertexKick(); -} - -void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r) -{ - if(!m_env.PRMODECONT.AC) - { - Flush(); - } - - uint32 _PRIM = m_env.PRMODE._PRIM; - m_env.PRMODE = (GSVector4i)r->PRMODE; - m_env.PRMODE._PRIM = _PRIM; - - UpdateContext(); - - UpdateVertexKick(); -} - -void GSState::GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r) -{ - if(r->TEXCLUT != m_env.TEXCLUT) - { - Flush(); - } - - m_env.TEXCLUT = (GSVector4i)r->TEXCLUT; -} - -void GSState::GIFRegHandlerSCANMSK(const GIFReg* RESTRICT r) -{ - if(r->SCANMSK != m_env.SCANMSK) - { - Flush(); - } - - m_env.SCANMSK = (GSVector4i)r->SCANMSK; -} - -template void GSState::GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r) -{ - if(PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1) - { - Flush(); - } - - m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1; -} - -template void GSState::GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r) -{ - if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2) - { - Flush(); - } - - m_env.CTXT[i].MIPTBP2 = (GSVector4i)r->MIPTBP2; -} - -void GSState::GIFRegHandlerTEXA(const GIFReg* RESTRICT r) -{ - if(r->TEXA != m_env.TEXA) - { - Flush(); - } - - m_env.TEXA = (GSVector4i)r->TEXA; -} - -void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r) -{ - if(r->FOGCOL != m_env.FOGCOL) - { - Flush(); - } - - m_env.FOGCOL = (GSVector4i)r->FOGCOL; -} - -void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r) -{ - m_texflush = true; -} - -template void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r) -{ - if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR) - { - Flush(); - } - - m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR; - - m_env.CTXT[i].UpdateScissor(); - - UpdateScissor(); -} - -template void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r) -{ - ASSERT(r->ALPHA.A != 3); - ASSERT(r->ALPHA.B != 3); - ASSERT(r->ALPHA.C != 3); - ASSERT(r->ALPHA.D != 3); - - if(PRIM->CTXT == i && r->ALPHA != m_env.CTXT[i].ALPHA) - { - Flush(); - } - - m_env.CTXT[i].ALPHA = (GSVector4i)r->ALPHA; - - // A/B/C/D == 3? => 2 - - m_env.CTXT[i].ALPHA.u32[0] = ((~m_env.CTXT[i].ALPHA.u32[0] >> 1) | 0xAA) & m_env.CTXT[i].ALPHA.u32[0]; -} - -void GSState::GIFRegHandlerDIMX(const GIFReg* RESTRICT r) -{ - bool update = false; - - if(r->DIMX != m_env.DIMX) - { - Flush(); - - update = true; - } - - m_env.DIMX = (GSVector4i)r->DIMX; - - if(update) - { - m_env.UpdateDIMX(); - } -} - -void GSState::GIFRegHandlerDTHE(const GIFReg* RESTRICT r) -{ - if(r->DTHE != m_env.DTHE) - { - Flush(); - } - - m_env.DTHE = (GSVector4i)r->DTHE; -} - -void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* RESTRICT r) -{ - if(r->COLCLAMP != m_env.COLCLAMP) - { - Flush(); - } - - m_env.COLCLAMP = (GSVector4i)r->COLCLAMP; -#ifdef DISABLE_COLCLAMP - m_env.COLCLAMP.CLAMP = 1; -#endif -} - -template void GSState::GIFRegHandlerTEST(const GIFReg* RESTRICT r) -{ - if(PRIM->CTXT == i && r->TEST != m_env.CTXT[i].TEST) - { - Flush(); - } - - m_env.CTXT[i].TEST = (GSVector4i)r->TEST; -#ifdef DISABLE_DATE - m_env.CTXT[i].TEST.DATE = 0; -#endif -} - -void GSState::GIFRegHandlerPABE(const GIFReg* RESTRICT r) -{ - if(r->PABE != m_env.PABE) - { - Flush(); - } - - m_env.PABE = (GSVector4i)r->PABE; -} - -template void GSState::GIFRegHandlerFBA(const GIFReg* RESTRICT r) -{ - if(PRIM->CTXT == i && r->FBA != m_env.CTXT[i].FBA) - { - Flush(); - } - - m_env.CTXT[i].FBA = (GSVector4i)r->FBA; -} - -template void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r) -{ - if(PRIM->CTXT == i && r->FRAME != m_env.CTXT[i].FRAME) - { - Flush(); - } - - if((m_env.CTXT[i].FRAME.u32[0] ^ r->FRAME.u32[0]) & 0x3f3f01ff) // FBP FBW PSM - { - m_env.CTXT[i].offset.fb = m_mem.GetOffset(r->FRAME.Block(), r->FRAME.FBW, r->FRAME.PSM); - m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), r->FRAME.FBW, m_env.CTXT[i].ZBUF.PSM); - m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(r->FRAME, m_env.CTXT[i].ZBUF); - m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(r->FRAME, m_env.CTXT[i].ZBUF); - } - - m_env.CTXT[i].FRAME = (GSVector4i)r->FRAME; - -#ifdef DISABLE_BITMASKING - m_env.CTXT[i].FRAME.FBMSK = GSVector4i::store(GSVector4i::load((int)m_env.CTXT[i].FRAME.FBMSK).eq8(GSVector4i::xffffffff())); -#endif -} - -template void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r) -{ - GIFRegZBUF ZBUF = r->ZBUF; - - if(ZBUF.u32[0] == 0) - { - // during startup all regs are cleared to 0 (by the bios or something), so we mask z until this register becomes valid - // edit: breaks Grandia Xtreme and sounds like a bad idea generally. What was the intend? - // edit2: should be set only before any serious drawing happens, grandia extreme nulls out this register throughout the whole game, - // I already forgot what it fixed, that game never masked the zbuffer, but assumed it was set by default - //ZBUF.ZMSK = 1; - } - - ZBUF.PSM |= 0x30; - - if(ZBUF.PSM != PSM_PSMZ32 - && ZBUF.PSM != PSM_PSMZ24 - && ZBUF.PSM != PSM_PSMZ16 - && ZBUF.PSM != PSM_PSMZ16S) - { - ZBUF.PSM = PSM_PSMZ32; - } - - if(PRIM->CTXT == i && ZBUF != m_env.CTXT[i].ZBUF) - { - Flush(); - } - - if((m_env.CTXT[i].ZBUF.u32[0] ^ ZBUF.u32[0]) & 0x3f0001ff) // ZBP PSM - { - m_env.CTXT[i].offset.zb = m_mem.GetOffset(ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, ZBUF.PSM); - m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, ZBUF); - m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, ZBUF); - } - - m_env.CTXT[i].ZBUF = (GSVector4i)ZBUF; -} - -void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r) -{ - if(r->BITBLTBUF != m_env.BITBLTBUF) - { - FlushWrite(); - } - - m_env.BITBLTBUF = (GSVector4i)r->BITBLTBUF; - - if((m_env.BITBLTBUF.SBW & 1) && (m_env.BITBLTBUF.SPSM == PSM_PSMT8 || m_env.BITBLTBUF.SPSM == PSM_PSMT4)) - { - m_env.BITBLTBUF.SBW &= ~1; - } - - if((m_env.BITBLTBUF.DBW & 1) && (m_env.BITBLTBUF.DPSM == PSM_PSMT8 || m_env.BITBLTBUF.DPSM == PSM_PSMT4)) - { - m_env.BITBLTBUF.DBW &= ~1; // namcoXcapcom: 5, 11, refered to as 4, 10 in TEX0.TBW later - } -} - -void GSState::GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r) -{ - if(r->TRXPOS != m_env.TRXPOS) - { - FlushWrite(); - } - - m_env.TRXPOS = (GSVector4i)r->TRXPOS; -} - -void GSState::GIFRegHandlerTRXREG(const GIFReg* RESTRICT r) -{ - if(r->TRXREG != m_env.TRXREG) - { - FlushWrite(); - } - - m_env.TRXREG = (GSVector4i)r->TRXREG; -} - -void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r) -{ - Flush(); - - m_env.TRXDIR = (GSVector4i)r->TRXDIR; - - switch(m_env.TRXDIR.XDIR) - { - case 0: // host -> local - m_tr.Init(m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY); - break; - case 1: // local -> host - m_tr.Init(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY); - break; - case 2: // local -> local - Move(); - break; - case 3: - ASSERT(0); - break; - default: - __assume(0); - } -} - -void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r) -{ - ASSERT(m_env.TRXDIR.XDIR == 0); // host => local - - Write((uint8*)r, 8); // haunting ground -} - -void GSState::GIFRegHandlerSIGNAL(const GIFReg* RESTRICT r) -{ - m_regs->SIGLBLID.SIGID = (m_regs->SIGLBLID.SIGID & ~r->SIGNAL.IDMSK) | (r->SIGNAL.ID & r->SIGNAL.IDMSK); - - if(m_regs->CSR.wSIGNAL) m_regs->CSR.rSIGNAL = 1; - if(!m_regs->IMR.SIGMSK && m_irq) m_irq(); -} - -void GSState::GIFRegHandlerFINISH(const GIFReg* RESTRICT r) -{ - if(m_regs->CSR.wFINISH) m_regs->CSR.rFINISH = 1; - if(!m_regs->IMR.FINISHMSK && m_irq) m_irq(); -} - -void GSState::GIFRegHandlerLABEL(const GIFReg* RESTRICT r) -{ - m_regs->SIGLBLID.LBLID = (m_regs->SIGLBLID.LBLID & ~r->LABEL.IDMSK) | (r->LABEL.ID & r->LABEL.IDMSK); -} - -// - -void GSState::Flush() -{ - FlushWrite(); - - FlushPrim(); -} - -void GSState::FlushWrite() -{ - int len = m_tr.end - m_tr.start; - - if(len <= 0) return; - - GSVector4i r; - - r.left = m_env.TRXPOS.DSAX; - r.top = m_env.TRXPOS.DSAY; - r.right = r.left + m_env.TRXREG.RRW; - r.bottom = r.top + m_env.TRXREG.RRH; - - InvalidateVideoMem(m_env.BITBLTBUF, r); - - //int y = m_tr.y; - - GSLocalMemory::writeImage wi = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wi; - - (m_mem.*wi)(m_tr.x, m_tr.y, &m_tr.buff[m_tr.start], len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG); - - m_tr.start += len; - - m_perfmon.Put(GSPerfMon::Swizzle, len); - - /* - GSVector4i r; - - r.left = m_env.TRXPOS.DSAX; - r.top = y; - r.right = r.left + m_env.TRXREG.RRW; - r.bottom = std::min(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1); - - InvalidateVideoMem(m_env.BITBLTBUF, r); - */ -/* - static int n = 0; - string s; - s = format("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp", - n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM, - r.left, r.top, r.right, r.bottom); - m_mem.SaveBMP(s, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.right, r.bottom); -*/ -} - -void GSState::FlushPrim() -{ - if(m_index.tail > 0) - { - GSVertex buff[2]; - - size_t head = m_vertex.head; - size_t tail = m_vertex.tail; - size_t next = m_vertex.next; - size_t unused = 0; - - if(tail > head) - { - switch(PRIM->PRIM) - { - case GS_POINTLIST: - ASSERT(0); - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - unused = tail - head; - memcpy(buff, &m_vertex.buff[head], sizeof(GSVertex) * unused); - break; - case GS_TRIANGLEFAN: - buff[0] = m_vertex.buff[head]; unused = 1; - if(tail - 1 > head) {buff[1] = m_vertex.buff[tail - 1]; unused = 2;} - break; - case GS_INVALID: - break; - default: - __assume(0); - } - - ASSERT((int)unused < GSUtil::GetVertexCount(PRIM->PRIM)); - } - - if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3) - { - // FIXME: berserk fpsm = 27 (8H) - - m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); - - Draw(); - - m_perfmon.Put(GSPerfMon::Draw, 1); - m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM)); - } - - m_index.tail = 0; - - m_vertex.head = 0; - - if(unused > 0) - { - memcpy(m_vertex.buff, buff, sizeof(GSVertex) * unused); - - m_vertex.tail = unused; - m_vertex.next = next > head ? next - head : 0; - } - else - { - m_vertex.tail = 0; - m_vertex.next = 0; - } - } -} - -// - -void GSState::Write(const uint8* mem, int len) -{ - int w = m_env.TRXREG.RRW; - int h = m_env.TRXREG.RRH; - - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM]; - - // printf("Write len=%d DBP=%05x DBW=%d DPSM=%d DSAX=%d DSAY=%d RRW=%d RRH=%d\n", len, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, m_env.TRXREG.RRW, m_env.TRXREG.RRH); - - if(!m_tr.Update(w, h, psm.trbpp, len)) - { - return; - } - - GL_CACHE("Write! ... => 0x%x W:%d F:%d (DIR %d%d), dPos(%d %d) size(%d %d)", - m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, - m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, - m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h); - - if(PRIM->TME && (m_env.BITBLTBUF.DBP == m_context->TEX0.TBP0 || m_env.BITBLTBUF.DBP == m_context->TEX0.CBP)) // TODO: hmmmm - { - FlushPrim(); - } - - if(m_tr.end == 0 && len >= m_tr.total) - { - // received all data in one piece, no need to buffer it - - // printf("%d >= %d\n", len, m_tr.total); - - GSVector4i r; - - r.left = m_env.TRXPOS.DSAX; - r.top = m_env.TRXPOS.DSAY; - r.right = r.left + m_env.TRXREG.RRW; - r.bottom = r.top + m_env.TRXREG.RRH; - - InvalidateVideoMem(m_env.BITBLTBUF, r); - - (m_mem.*psm.wi)(m_tr.x, m_tr.y, mem, m_tr.total, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG); - - m_tr.start = m_tr.end = m_tr.total; - - m_perfmon.Put(GSPerfMon::Swizzle, len); - - /* - static int n = 0; - string s; - s = format("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp", - n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM, - r.left, r.top, r.right, r.bottom); - m_mem.SaveBMP(s, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.right, r.bottom); - */ - } - else - { - // printf("%d += %d (%d)\n", m_tr.end, len, m_tr.total); - - memcpy(&m_tr.buff[m_tr.end], mem, len); - - m_tr.end += len; - - if(m_tr.end >= m_tr.total) - { - FlushWrite(); - } - } - - m_mem.m_clut.Invalidate(); -} - -void GSState::InitReadFIFO(uint8* mem, int len) -{ - if(len <= 0) return; - - // Allow to keep compatibility with older PCSX2 - m_init_read_fifo_supported = true; - - int sx = m_env.TRXPOS.SSAX; - int sy = m_env.TRXPOS.SSAY; - int w = m_env.TRXREG.RRW; - int h = m_env.TRXREG.RRH; - - // printf("Read len=%d SBP=%05x SBW=%d SPSM=%d SSAX=%d SSAY=%d RRW=%d RRH=%d\n", len, (int)m_env.BITBLTBUF.SBP, (int)m_env.BITBLTBUF.SBW, (int)m_env.BITBLTBUF.SPSM, sx, sy, w, h); - - if(!m_tr.Update(w, h, GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp, len)) - { - return; - } - - if(m_tr.x == sx && m_tr.y == sy) - { - InvalidateLocalMem(m_env.BITBLTBUF, GSVector4i(sx, sy, sx + w, sy + h)); - } -} - -void GSState::Read(uint8* mem, int len) -{ - if(len <= 0) return; - - int sx = m_env.TRXPOS.SSAX; - int sy = m_env.TRXPOS.SSAY; - int w = m_env.TRXREG.RRW; - int h = m_env.TRXREG.RRH; - - // printf("Read len=%d SBP=%05x SBW=%d SPSM=%d SSAX=%d SSAY=%d RRW=%d RRH=%d\n", len, (int)m_env.BITBLTBUF.SBP, (int)m_env.BITBLTBUF.SBW, (int)m_env.BITBLTBUF.SPSM, sx, sy, w, h); - - if(!m_tr.Update(w, h, GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp, len)) - { - return; - } - - if(!m_init_read_fifo_supported) - { - if(m_tr.x == sx && m_tr.y == sy) - { - InvalidateLocalMem(m_env.BITBLTBUF, GSVector4i(sx, sy, sx + w, sy + h)); - } - } - - m_mem.ReadImageX(m_tr.x, m_tr.y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG); -} - -void GSState::Move() -{ - // ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect - // guitar hero copies the far end of the board to do a similar blend too - - int sx = m_env.TRXPOS.SSAX; - int sy = m_env.TRXPOS.SSAY; - int dx = m_env.TRXPOS.DSAX; - int dy = m_env.TRXPOS.DSAY; - int w = m_env.TRXREG.RRW; - int h = m_env.TRXREG.RRH; - - GL_CACHE("Move! 0x%x W:%d F:%d => 0x%x W:%d F:%d (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d)", - m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, - m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, - m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, - sx, sy, dx, dy, w, h); - - InvalidateLocalMem(m_env.BITBLTBUF, GSVector4i(sx, sy, sx + w, sy + h)); - InvalidateVideoMem(m_env.BITBLTBUF, GSVector4i(dx, dy, dx + w, dy + h)); - - int xinc = 1; - int yinc = 1; - - if(m_env.TRXPOS.DIRX) {sx += w - 1; dx += w - 1; xinc = -1;} - if(m_env.TRXPOS.DIRY) {sy += h - 1; dy += h - 1; yinc = -1;} -/* - printf("%05x %d %d => %05x %d %d (%d%d), %d %d %d %d %d %d\n", - m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, - m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, - m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, - sx, sy, dx, dy, w, h); -*/ -/* - GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp; - GSLocalMemory::writePixel wp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wp; - - for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w) - for(int x = 0; x < w; x++, sx += xinc, dx += xinc) - (m_mem.*wp)(dx, dy, (m_mem.*rp)(sx, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW), m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); -*/ - - const GSLocalMemory::psm_t& spsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM]; - const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM]; - - // TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format) - - GSOffset* RESTRICT spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM); - GSOffset* RESTRICT dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM); - - if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16) - { - int* RESTRICT scol = &spo->pixel.col[0][sx]; - int* RESTRICT dcol = &dpo->pixel.col[0][dx]; - - if(spsm.trbpp == 32) - { - if(xinc > 0) - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]]; - uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]]; - - for(int x = 0; x < w; x++) d[dcol[x]] = s[scol[x]]; - } - } - else - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]]; - uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]]; - - for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]]; - } - } - } - else if(spsm.trbpp == 24) - { - if(xinc > 0) - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]]; - uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]]; - - for(int x = 0; x < w; x++) d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff); - } - } - else - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]]; - uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]]; - - for(int x = 0; x > -w; x--) d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff); - } - } - } - else // if(spsm.trbpp == 16) - { - if(xinc > 0) - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]]; - uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]]; - - for(int x = 0; x < w; x++) d[dcol[x]] = s[scol[x]]; - } - } - else - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]]; - uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]]; - - for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]]; - } - } - } - } - else if(m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8) - { - if(xinc > 0) - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]]; - uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]]; - - int* RESTRICT scol = &spo->pixel.col[sy & 7][sx]; - int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx]; - - for(int x = 0; x < w; x++) d[dcol[x]] = s[scol[x]]; - } - } - else - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]]; - uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]]; - - int* RESTRICT scol = &spo->pixel.col[sy & 7][sx]; - int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx]; - - for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]]; - } - } - } - else if(m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4) - { - if(xinc > 0) - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint32 sbase = spo->pixel.row[sy]; - uint32 dbase = dpo->pixel.row[dy]; - - int* RESTRICT scol = &spo->pixel.col[sy & 7][sx]; - int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx]; - - for(int x = 0; x < w; x++) m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x])); - } - } - else - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint32 sbase = spo->pixel.row[sy]; - uint32 dbase = dpo->pixel.row[dy]; - - int* RESTRICT scol = &spo->pixel.col[sy & 7][sx]; - int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx]; - - for(int x = 0; x > -w; x--) m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x])); - } - } - } - else - { - if(xinc > 0) - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint32 sbase = spo->pixel.row[sy]; - uint32 dbase = dpo->pixel.row[dy]; - - int* RESTRICT scol = &spo->pixel.col[sy & 7][sx]; - int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx]; - - for(int x = 0; x < w; x++) (m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x])); - } - } - else - { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc) - { - uint32 sbase = spo->pixel.row[sy]; - uint32 dbase = dpo->pixel.row[dy]; - - int* RESTRICT scol = &spo->pixel.col[sy & 7][sx]; - int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx]; - - for(int x = 0; x > -w; x--) (m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x])); - } - } - } -} - -void GSState::SoftReset(uint32 mask) -{ - if(mask & 1) - { - memset(&m_path[0], 0, sizeof(GIFPath)); - memset(&m_path[3], 0, sizeof(GIFPath)); - } - - if(mask & 2) memset(&m_path[1], 0, sizeof(GIFPath)); - if(mask & 4) memset(&m_path[2], 0, sizeof(GIFPath)); - - m_env.TRXDIR.XDIR = 3; //-1 ; set it to invalid value - - m_q = 1.0f; -} - -void GSState::ReadFIFO(uint8* mem, int size) -{ - GSPerfMonAutoTimer pmat(&m_perfmon); - - Flush(); - - size *= 16; - - Read(mem, size); - - if(m_dump) - { - m_dump.ReadFIFO(size); - } -} - -template void GSState::Transfer<0>(const uint8* mem, uint32 size); -template void GSState::Transfer<1>(const uint8* mem, uint32 size); -template void GSState::Transfer<2>(const uint8* mem, uint32 size); -template void GSState::Transfer<3>(const uint8* mem, uint32 size); - -template void GSState::Transfer(const uint8* mem, uint32 size) -{ - GSPerfMonAutoTimer pmat(&m_perfmon); - - const uint8* start = mem; - - GIFPath& path = m_path[index]; - - while(size > 0) - { - if(path.nloop == 0) - { - path.SetTag(mem); - - mem += sizeof(GIFTag); - size--; - - if(path.nloop > 0) // eeuser 7.2.2. GIFtag: "... when NLOOP is 0, the GIF does not output anything, and values other than the EOP field are disregarded." - { - m_q = 1.0f; - - // ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts - - if(path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED) - { - ApplyPRIM(path.tag.PRIM); - } - } - } - else - { - uint32 total; - - switch(path.tag.FLG) - { - case GIF_FLG_PACKED: - - // get to the start of the loop - - if(path.reg != 0) - { - do - { - (this->*m_fpGIFPackedRegHandlers[path.GetReg()])((GIFPackedReg*)mem); - - mem += sizeof(GIFPackedReg); - size--; - } - while(path.StepReg() && size > 0 && path.reg != 0); - } - - // all data available? usually is - - total = path.nloop * path.nreg; - - if(size >= total) - { - size -= total; - - switch(path.type) - { - case GIFPath::TYPE_UNKNOWN: - - { - uint32 reg = 0; - - do - { - (this->*m_fpGIFPackedRegHandlers[path.GetReg(reg++)])((GIFPackedReg*)mem); - - mem += sizeof(GIFPackedReg); - - reg = reg & ((int)(reg - path.nreg) >> 31); // resets reg back to 0 when it becomes equal to path.nreg - } - while(--total > 0); - } - - break; - - case GIFPath::TYPE_ADONLY: // very common - - do - { - (this->*m_fpGIFRegHandlers[((GIFPackedReg*)mem)->A_D.ADDR])(&((GIFPackedReg*)mem)->r); - - mem += sizeof(GIFPackedReg); - } - while(--total > 0); - - break; - - case GIFPath::TYPE_STQRGBAXYZF2: // majority of the vertices are formatted like this - - (this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2])((GIFPackedReg*)mem, total); - - mem += total * sizeof(GIFPackedReg); - - break; - - case GIFPath::TYPE_STQRGBAXYZ2: - - (this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2])((GIFPackedReg*)mem, total); - - mem += total * sizeof(GIFPackedReg); - - break; - - default: - - __assume(0); - } - - path.nloop = 0; - } - else - { - do - { - (this->*m_fpGIFPackedRegHandlers[path.GetReg()])((GIFPackedReg*)mem); - - mem += sizeof(GIFPackedReg); - size--; - } - while(path.StepReg() && size > 0); - } - - break; - - case GIF_FLG_REGLIST: - - // TODO: do it similar to packed operation - - size *= 2; - - do - { - (this->*m_fpGIFRegHandlers[path.GetReg()])((GIFReg*)mem); - - mem += sizeof(GIFReg); - size--; - } - while(path.StepReg() && size > 0); - - if(size & 1) mem += sizeof(GIFReg); - - size /= 2; - - break; - - case GIF_FLG_IMAGE2: // hmmm // Fall through here fixes a crash in Wallace and Gromit Project Zoo - // and according to Pseudonym we shouldn't even land in this code. So hmm indeed. (rama) - - /*ASSERT(0); - - path.nloop = 0; - - break;*/ - - case GIF_FLG_IMAGE: - - { - int len = (int)min(size, path.nloop); - - //ASSERT(!(len&3)); - - switch(m_env.TRXDIR.XDIR) - { - case 0: - Write(mem, len * 16); - break; - case 1: - // This can't happen; downloads can not be started or performed as part of - // a GIFtag operation. They're an entirely separate process that can only be - // done through the ReverseFIFO transfer (aka ReadFIFO). --air - ASSERT(0); - //Read(mem, len * 16); - break; - case 2: - Move(); - break; - case 3: - ASSERT(0); - break; - default: - __assume(0); - } - - mem += len * 16; - path.nloop -= len; - size -= len; - } - - break; - - default: - __assume(0); - } - } - - if(index == 0) - { - if(path.tag.EOP && path.nloop == 0) - { - break; - } - } - } - - if(m_dump && mem > start) - { - m_dump.Transfer(index, start, mem - start); - } - - if(index == 0) - { - if(size == 0 && path.nloop > 0) - { - if(m_mt) - { - // Hackfix for BIOS, which sends an incomplete packet when it does an XGKICK without - // having an EOP specified anywhere in VU1 memory. Needed until PCSX2 is fixed to - // handle it more properly (ie, without looping infinitely). - - path.nloop = 0; - } - else - { - // Unused in 0.9.7 and above, but might as well keep this for now; allows GSdx - // to work with legacy editions of PCSX2. - - Transfer<0>(mem - 0x4000, 0x4000 / 16); - } - } - } -} - -template static void WriteState(uint8*& dst, T* src, size_t len = sizeof(T)) -{ - memcpy(dst, src, len); - dst += len; -} - -template static void ReadState(T* dst, uint8*& src, size_t len = sizeof(T)) -{ - memcpy(dst, src, len); - src += len; -} - -int GSState::Freeze(GSFreezeData* fd, bool sizeonly) -{ - if(sizeonly) - { - fd->size = m_sssize; - return 0; - } - - if(!fd->data || fd->size < m_sssize) - { - return -1; - } - - Flush(); - - uint8* data = fd->data; - - WriteState(data, &m_version); - WriteState(data, &m_env.PRIM); - WriteState(data, &m_env.PRMODE); - WriteState(data, &m_env.PRMODECONT); - WriteState(data, &m_env.TEXCLUT); - WriteState(data, &m_env.SCANMSK); - WriteState(data, &m_env.TEXA); - WriteState(data, &m_env.FOGCOL); - WriteState(data, &m_env.DIMX); - WriteState(data, &m_env.DTHE); - WriteState(data, &m_env.COLCLAMP); - WriteState(data, &m_env.PABE); - WriteState(data, &m_env.BITBLTBUF); - WriteState(data, &m_env.TRXDIR); - WriteState(data, &m_env.TRXPOS); - WriteState(data, &m_env.TRXREG); - WriteState(data, &m_env.TRXREG); // obsolete - - for(int i = 0; i < 2; i++) - { - WriteState(data, &m_env.CTXT[i].XYOFFSET); - WriteState(data, &m_env.CTXT[i].TEX0); - WriteState(data, &m_env.CTXT[i].TEX1); - WriteState(data, &m_env.CTXT[i].TEX2); - WriteState(data, &m_env.CTXT[i].CLAMP); - WriteState(data, &m_env.CTXT[i].MIPTBP1); - WriteState(data, &m_env.CTXT[i].MIPTBP2); - WriteState(data, &m_env.CTXT[i].SCISSOR); - WriteState(data, &m_env.CTXT[i].ALPHA); - WriteState(data, &m_env.CTXT[i].TEST); - WriteState(data, &m_env.CTXT[i].FBA); - WriteState(data, &m_env.CTXT[i].FRAME); - WriteState(data, &m_env.CTXT[i].ZBUF); - } - - WriteState(data, &m_v.RGBAQ); - WriteState(data, &m_v.ST); - WriteState(data, &m_v.UV); - WriteState(data, &m_v.FOG); - WriteState(data, &m_v.XYZ); - data += sizeof(GIFReg); // obsolite - WriteState(data, &m_tr.x); - WriteState(data, &m_tr.y); - WriteState(data, m_mem.m_vm8, m_mem.m_vmsize); - - for(size_t i = 0; i < countof(m_path); i++) - { - m_path[i].tag.NREG = m_path[i].nreg; - m_path[i].tag.NLOOP = m_path[i].nloop; - m_path[i].tag.REGS = 0; - - for(size_t j = 0; j < countof(m_path[i].regs.u8); j++) - { - m_path[i].tag.u32[2 + (j >> 3)] |= m_path[i].regs.u8[j] << ((j & 7) << 2); - } - - WriteState(data, &m_path[i].tag); - WriteState(data, &m_path[i].reg); - } - - WriteState(data, &m_q); - - return 0; -} - -int GSState::Defrost(const GSFreezeData* fd) -{ - if(!fd || !fd->data || fd->size == 0) - { - return -1; - } - - if(fd->size < m_sssize) - { - return -1; - } - - uint8* data = fd->data; - - int version; - - ReadState(&version, data); - - if(version > m_version) - { - printf("GSdx: Savestate version is incompatible. Load aborted.\n" ); - - return -1; - } - - Flush(); - - Reset(); - - ReadState(&m_env.PRIM, data); - ReadState(&m_env.PRMODE, data); - ReadState(&m_env.PRMODECONT, data); - ReadState(&m_env.TEXCLUT, data); - ReadState(&m_env.SCANMSK, data); - ReadState(&m_env.TEXA, data); - ReadState(&m_env.FOGCOL, data); - ReadState(&m_env.DIMX, data); - ReadState(&m_env.DTHE, data); - ReadState(&m_env.COLCLAMP, data); - ReadState(&m_env.PABE, data); - ReadState(&m_env.BITBLTBUF, data); - ReadState(&m_env.TRXDIR, data); - ReadState(&m_env.TRXPOS, data); - ReadState(&m_env.TRXREG, data); - ReadState(&m_env.TRXREG, data); // obsolete - - for(int i = 0; i < 2; i++) - { - ReadState(&m_env.CTXT[i].XYOFFSET, data); - ReadState(&m_env.CTXT[i].TEX0, data); - ReadState(&m_env.CTXT[i].TEX1, data); - ReadState(&m_env.CTXT[i].TEX2, data); - ReadState(&m_env.CTXT[i].CLAMP, data); - ReadState(&m_env.CTXT[i].MIPTBP1, data); - ReadState(&m_env.CTXT[i].MIPTBP2, data); - ReadState(&m_env.CTXT[i].SCISSOR, data); - ReadState(&m_env.CTXT[i].ALPHA, data); - ReadState(&m_env.CTXT[i].TEST, data); - ReadState(&m_env.CTXT[i].FBA, data); - ReadState(&m_env.CTXT[i].FRAME, data); - ReadState(&m_env.CTXT[i].ZBUF, data); - - m_env.CTXT[i].XYOFFSET.OFX &= 0xffff; - m_env.CTXT[i].XYOFFSET.OFY &= 0xffff; - - if(version <= 4) - { - data += sizeof(uint32) * 7; // skip - } - } - - ReadState(&m_v.RGBAQ, data); - ReadState(&m_v.ST, data); - ReadState(&m_v.UV, data); - ReadState(&m_v.FOG, data); - ReadState(&m_v.XYZ, data); - data += sizeof(GIFReg); // obsolite - ReadState(&m_tr.x, data); - ReadState(&m_tr.y, data); - ReadState(m_mem.m_vm8, data, m_mem.m_vmsize); - - m_tr.total = 0; // TODO: restore transfer state - - for(size_t i = 0; i < countof(m_path); i++) - { - ReadState(&m_path[i].tag, data); - ReadState(&m_path[i].reg, data); - - m_path[i].SetTag(&m_path[i].tag); // expand regs - } - - ReadState(&m_q, data); - - PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM; - - UpdateContext(); - - UpdateVertexKick(); - - m_env.UpdateDIMX(); - - for(size_t i = 0; i < 2; i++) - { - m_env.CTXT[i].UpdateScissor(); - - m_env.CTXT[i].offset.fb = m_mem.GetOffset(m_env.CTXT[i].FRAME.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].FRAME.PSM); - m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].ZBUF.PSM); - m_env.CTXT[i].offset.tex = m_mem.GetOffset(m_env.CTXT[i].TEX0.TBP0, m_env.CTXT[i].TEX0.TBW, m_env.CTXT[i].TEX0.PSM); - m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF); - m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF); - } - - UpdateScissor(); - -m_perfmon.SetFrame(5000); - - return 0; -} - -void GSState::SetGameCRC(uint32 crc, int options) -{ - m_crc = crc; - m_options = options; - m_game = CRC::Lookup(m_crc_hack_level ? crc : 0); -} - -// - -void GSState::UpdateContext() -{ - m_context = &m_env.CTXT[PRIM->CTXT]; - - UpdateScissor(); -} - -void GSState::UpdateScissor() -{ - m_scissor = m_context->scissor.ex; - m_ofxy = m_context->scissor.ofxy; -} - -void GSState::UpdateVertexKick() -{ - if(m_frameskip) return; - - uint32 prim = PRIM->PRIM; - - m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = m_fpGIFPackedRegHandlerXYZ[prim][0]; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = m_fpGIFPackedRegHandlerXYZ[prim][1]; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = m_fpGIFPackedRegHandlerXYZ[prim][2]; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = m_fpGIFPackedRegHandlerXYZ[prim][3]; - - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = m_fpGIFRegHandlerXYZ[prim][0]; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = m_fpGIFRegHandlerXYZ[prim][1]; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = m_fpGIFRegHandlerXYZ[prim][2]; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = m_fpGIFRegHandlerXYZ[prim][3]; - - m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim]; - m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = m_fpGIFPackedRegHandlerSTQRGBAXYZ2[prim]; -} - -void GSState::GrowVertexBuffer() -{ - int maxcount = std::max(m_vertex.maxcount * 3 / 2, 10000); - - GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 32); - uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 32); // worst case is slightly less than vertex number * 3 - - if(vertex == NULL || index == NULL) - { - printf("GSdx: failed to allocate %d bytes for verticles and %d for indices.\n", (int)sizeof(GSVertex) * maxcount, (int)sizeof(uint32) * maxcount * 3); - throw GSDXError(); - } - - if(m_vertex.buff != NULL) - { - memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail); - - _aligned_free(m_vertex.buff); - } - - if(m_index.buff != NULL) - { - memcpy(index, m_index.buff, sizeof(uint32) * m_index.tail); - - _aligned_free(m_index.buff); - } - - m_vertex.buff = vertex; - m_vertex.maxcount = maxcount - 3; // -3 to have some space at the end of the buffer before DrawingKick can grow it - m_index.buff = index; -} - -template -__forceinline void GSState::VertexKick(uint32 skip) -{ - ASSERT(m_vertex.tail < m_vertex.maxcount + 3); - - size_t head = m_vertex.head; - size_t tail = m_vertex.tail; - size_t next = m_vertex.next; - size_t xy_tail = m_vertex.xy_tail; - - // callers should write XYZUVF to m_v.m[1] in one piece to have this load store-forwarded, either by the cpu or the compiler when this function is inlined - - GSVector4i v0(m_v.m[0]); - GSVector4i v1(m_v.m[1]); - - GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[tail]; - - tailptr[0] = v0; - tailptr[1] = v1; - - GSVector4i xy = v1.xxxx().u16to32().sub32(m_ofxy); - - #if _M_SSE >= 0x401 - GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend16<0xf0>(xy.sra32(4)).ps32()); - #else - GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.upl64(xy.sra32(4).zwzw()).ps32()); - #endif - - m_vertex.tail = ++tail; - m_vertex.xy_tail = ++xy_tail; - - size_t n = 0; - - switch(prim) - { - case GS_POINTLIST: n = 1; break; - case GS_LINELIST: n = 2; break; - case GS_LINESTRIP: n = 2; break; - case GS_TRIANGLELIST: n = 3; break; - case GS_TRIANGLESTRIP: n = 3; break; - case GS_TRIANGLEFAN: n = 3; break; - case GS_SPRITE: n = 2; break; - case GS_INVALID: n = 1; break; - } - - size_t m = tail - head; - - if(m < n) - { - return; - } - - if(skip == 0 && (prim != GS_TRIANGLEFAN || m <= 4)) // m_vertex.xy only knows about the last 4 vertices, head could be far behind for fan - { - GSVector4i v0, v1, v2, v3, pmin, pmax; - - v0 = GSVector4i::loadl(&m_vertex.xy[(xy_tail + 1) & 3]); // T-3 - v1 = GSVector4i::loadl(&m_vertex.xy[(xy_tail + 2) & 3]); // T-2 - v2 = GSVector4i::loadl(&m_vertex.xy[(xy_tail + 3) & 3]); // T-1 - v3 = GSVector4i::loadl(&m_vertex.xy[(xy_tail - m) & 3]); // H - - GSVector4 cross; - - switch(prim) - { - case GS_POINTLIST: - pmin = v2; - pmax = v2; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - pmin = v2.min_i16(v1); - pmax = v2.max_i16(v1); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - pmin = v2.min_i16(v1.min_i16(v0)); - pmax = v2.max_i16(v1.max_i16(v0)); - break; - case GS_TRIANGLEFAN: - pmin = v2.min_i16(v1.min_i16(v3)); - pmax = v2.max_i16(v1.max_i16(v3)); - break; - default: - break; - } - - GSVector4i test = pmax.lt16(m_scissor) | pmin.gt16(m_scissor.zwzwl()); - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_SPRITE: - test |= m_nativeres ? pmin.eq16(pmax).zwzwl() : pmin.eq16(pmax); - break; - default: - break; - } - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - // TODO: any way to do a 16-bit integer cross product? - // cross product is zero most of the time because either of the vertices are the same - /* - cross = GSVector4(v2.xyxyl().i16to32().sub32(v0.upl32(v1).i16to32())); // x20, y20, x21, y21 - cross = cross * cross.wzwz(); // x20 * y21, y20 * x21 - test |= GSVector4i::cast(cross == cross.yxwz()); - */ - test = (test | v0 == v1) | (v1 == v2 | v0 == v2); - break; - case GS_TRIANGLEFAN: - /* - cross = GSVector4(v2.xyxyl().i16to32().sub32(v3.upl32(v1).i16to32())); // x23, y23, x21, y21 - cross = cross * cross.wzwz(); // x23 * y21, y23 * x21 - test |= GSVector4i::cast(cross == cross.yxwz()); - */ - test = (test | v3 == v1) | (v1 == v2 | v3 == v2); - break; - default: - break; - } - - skip |= test.mask() & 15; - } - - if(skip != 0) - { - switch(prim) - { - case GS_POINTLIST: - case GS_LINELIST: - case GS_TRIANGLELIST: - case GS_SPRITE: - case GS_INVALID: - m_vertex.tail = head; // no need to check or grow the buffer length - break; - case GS_LINESTRIP: - case GS_TRIANGLESTRIP: - m_vertex.head = head + 1; - // fall through - case GS_TRIANGLEFAN: - if(tail >= m_vertex.maxcount) GrowVertexBuffer(); // in case too many vertices were skipped - break; - default: - __assume(0); - } - - return; - } - - if(tail >= m_vertex.maxcount) GrowVertexBuffer(); - - uint32* RESTRICT buff = &m_index.buff[m_index.tail]; - - switch(prim) - { - case GS_POINTLIST: - buff[0] = head + 0; - m_vertex.head = head + 1; - m_vertex.next = head + 1; - m_index.tail += 1; - break; - case GS_LINELIST: - buff[0] = head + 0; - buff[1] = head + 1; - m_vertex.head = head + 2; - m_vertex.next = head + 2; - m_index.tail += 2; - break; - case GS_LINESTRIP: - if(next < head) - { - m_vertex.buff[next + 0] = m_vertex.buff[head + 0]; - m_vertex.buff[next + 1] = m_vertex.buff[head + 1]; - head = next; - m_vertex.tail = next + 2; - } - buff[0] = head + 0; - buff[1] = head + 1; - m_vertex.head = head + 1; - m_vertex.next = head + 2; - m_index.tail += 2; - break; - case GS_TRIANGLELIST: - buff[0] = head + 0; - buff[1] = head + 1; - buff[2] = head + 2; - m_vertex.head = head + 3; - m_vertex.next = head + 3; - m_index.tail += 3; - break; - case GS_TRIANGLESTRIP: - if(next < head) - { - m_vertex.buff[next + 0] = m_vertex.buff[head + 0]; - m_vertex.buff[next + 1] = m_vertex.buff[head + 1]; - m_vertex.buff[next + 2] = m_vertex.buff[head + 2]; - head = next; - m_vertex.tail = next + 3; - } - buff[0] = head + 0; - buff[1] = head + 1; - buff[2] = head + 2; - m_vertex.head = head + 1; - m_vertex.next = head + 3; - m_index.tail += 3; - break; - case GS_TRIANGLEFAN: - // TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare) - buff[0] = head + 0; - buff[1] = tail - 2; - buff[2] = tail - 1; - m_vertex.next = tail; - m_index.tail += 3; - break; - case GS_SPRITE: - buff[0] = head + 0; - buff[1] = head + 1; - m_vertex.head = head + 2; - m_vertex.next = head + 2; - m_index.tail += 2; - break; - case GS_INVALID: - m_vertex.tail = head; - break; - default: - __assume(0); - } -} - -void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear) -{ - // TODO: some of the +1s can be removed if linear == false - - int tw = TEX0.TW; - int th = TEX0.TH; - - int w = 1 << tw; - int h = 1 << th; - - GSVector4i tr(0, 0, w, h); - - int wms = CLAMP.WMS; - int wmt = CLAMP.WMT; - - int minu = (int)CLAMP.MINU; - int minv = (int)CLAMP.MINV; - int maxu = (int)CLAMP.MAXU; - int maxv = (int)CLAMP.MAXV; - - GSVector4i vr = tr; - - switch(wms) - { - case CLAMP_REPEAT: - break; - case CLAMP_CLAMP: - break; - case CLAMP_REGION_CLAMP: - if(vr.x < minu) vr.x = minu; - if(vr.z > maxu + 1) vr.z = maxu + 1; - break; - case CLAMP_REGION_REPEAT: - vr.x = maxu; - vr.z = vr.x + (minu + 1); - break; - default: - __assume(0); - } - - switch(wmt) - { - case CLAMP_REPEAT: - break; - case CLAMP_CLAMP: - break; - case CLAMP_REGION_CLAMP: - if(vr.y < minv) vr.y = minv; - if(vr.w > maxv + 1) vr.w = maxv + 1; - break; - case CLAMP_REGION_REPEAT: - vr.y = maxv; - vr.w = vr.y + (minv + 1); - break; - default: - __assume(0); - } - - if(wms != CLAMP_REGION_REPEAT || wmt != CLAMP_REGION_REPEAT) - { - GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t); - - if(linear) - { - st += GSVector4(-0.5f, 0.5f).xxyy(); - } - - GSVector4i uv = GSVector4i(st.floor()); - - GSVector4i u, v; - - int mask = 0; - - // See commented code below for the meaning of mask - - if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT) - { - u = uv & GSVector4i::xffffffff().srl32(32 - tw); - v = uv & GSVector4i::xffffffff().srl32(32 - th); - - GSVector4i uu = uv.sra32(tw); - GSVector4i vv = uv.sra32(th); - - mask = (uu.upl32(vv) == uu.uph32(vv)).mask(); - } - - uv = uv.rintersect(tr); - - switch(wms) - { - case CLAMP_REPEAT: - // This commented code cannot be used directly because it needs uv before the intersection - /*if (uv_.x >> tw == uv_.z >> tw) - { - vr.x = max(vr.x, (uv_.x & ((1 << tw) - 1))); - vr.z = min(vr.z, (uv_.z & ((1 << tw) - 1)) + 1); - }*/ - if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;} - break; - case CLAMP_CLAMP: - case CLAMP_REGION_CLAMP: - if(vr.x > uv.z) vr.z = vr.x + 1; - else if(vr.z < uv.x) vr.x = vr.z - 1; - else - { - if(vr.x < uv.x) vr.x = uv.x; - if(vr.z > uv.z + 1) vr.z = uv.z + 1; - } - break; - case CLAMP_REGION_REPEAT: - break; - default: - __assume(0); - } - - switch(wmt) - { - case CLAMP_REPEAT: - /*if (uv_.y >> th == uv_.w >> th) - { - vr.y = max(vr.y, (uv_.y & ((1 << th) - 1))); - vr.w = min(vr.w, (uv_.w & ((1 << th) - 1)) + 1); - }*/ - if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;} - break; - case CLAMP_CLAMP: - case CLAMP_REGION_CLAMP: - if(vr.y > uv.w) vr.w = vr.y + 1; - else if(vr.w < uv.y) vr.y = vr.w - 1; - else - { - if(vr.y < uv.y) vr.y = uv.y; - if(vr.w > uv.w + 1) vr.w = uv.w + 1; - } - break; - case CLAMP_REGION_REPEAT: - break; - default: - __assume(0); - } - } - - vr = vr.rintersect(tr); - - // This really shouldn't happen now except with the clamping region set entirely outside the texture, - // special handling should be written for that case. - - if(vr.rempty()) - { - // NOTE: this can happen when texcoords are all outside the texture or clamping area is zero, but we can't - // let the texture cache update nothing, the sampler will still need a single texel from the border somewhere - // examples: - // - THPS (no visible problems) - // - NFSMW (strange rectangles on screen, might be unrelated) - // - Lupin 3rd (huge problems, textures sizes seem to be randomly specified) - - vr = (vr + GSVector4i(-1, +1).xxyy()).rintersect(tr); - } - - r = vr; -} - -void GSState::GetAlphaMinMax() -{ - if(m_vt.m_alpha.valid) - { - return; - } - - const GSDrawingEnvironment& env = m_env; - const GSDrawingContext* context = m_context; - - GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww(); - - if(PRIM->TME && context->TEX0.TCC) - { - switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt) - { - case 0: - a.y = 0; - a.w = 0xff; - break; - case 1: - a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0; - a.w = env.TEXA.TA0; - break; - case 2: - a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1); - a.w = max(env.TEXA.TA0, env.TEXA.TA1); - break; - case 3: - m_mem.m_clut.GetAlphaMinMax32(a.y, a.w); - break; - default: - __assume(0); - } - - switch(context->TEX0.TFX) - { - case TFX_MODULATE: - a.x = (a.x * a.y) >> 7; - a.z = (a.z * a.w) >> 7; - if(a.x > 0xff) a.x = 0xff; - if(a.z > 0xff) a.z = 0xff; - break; - case TFX_DECAL: - a.x = a.y; - a.z = a.w; - break; - case TFX_HIGHLIGHT: - a.x = a.x + a.y; - a.z = a.z + a.w; - if(a.x > 0xff) a.x = 0xff; - if(a.z > 0xff) a.z = 0xff; - break; - case TFX_HIGHLIGHT2: - a.x = a.y; - a.z = a.w; - break; - default: - __assume(0); - } - } - - m_vt.m_alpha.min = a.x; - m_vt.m_alpha.max = a.z; - m_vt.m_alpha.valid = true; -} - -bool GSState::TryAlphaTest(uint32& fm, uint32& zm) -{ - const GSDrawingContext* context = m_context; - - bool pass = true; - - if(context->TEST.ATST == ATST_NEVER) - { - pass = false; - } - else if(context->TEST.ATST != ATST_ALWAYS) - { - GetAlphaMinMax(); - - int amin = m_vt.m_alpha.min; - int amax = m_vt.m_alpha.max; - - int aref = context->TEST.AREF; - - switch(context->TEST.ATST) - { - case ATST_NEVER: - pass = false; - break; - case ATST_ALWAYS: - pass = true; - break; - case ATST_LESS: - if(amax < aref) pass = true; - else if(amin >= aref) pass = false; - else return false; - break; - case ATST_LEQUAL: - if(amax <= aref) pass = true; - else if(amin > aref) pass = false; - else return false; - break; - case ATST_EQUAL: - if(amin == aref && amax == aref) pass = true; - else if(amin > aref || amax < aref) pass = false; - else return false; - break; - case ATST_GEQUAL: - if(amin >= aref) pass = true; - else if(amax < aref) pass = false; - else return false; - break; - case ATST_GREATER: - if(amin > aref) pass = true; - else if(amax <= aref) pass = false; - else return false; - break; - case ATST_NOTEQUAL: - if(amin == aref && amax == aref) pass = false; - else if(amin > aref || amax < aref) pass = true; - else return false; - break; - default: - __assume(0); - } - } - - if(!pass) - { - switch(context->TEST.AFAIL) - { - case AFAIL_KEEP: fm = zm = 0xffffffff; break; - case AFAIL_FB_ONLY: zm = 0xffffffff; break; - case AFAIL_ZB_ONLY: fm = 0xffffffff; break; - case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break; - default: __assume(0); - } - } - - return true; -} - -bool GSState::IsOpaque() -{ - if(PRIM->AA1) - { - return false; - } - - if(!PRIM->ABE) - { - return true; - } - - const GSDrawingContext* context = m_context; - - int amin = 0, amax = 0xff; - - if(context->ALPHA.A != context->ALPHA.B) - { - if(context->ALPHA.C == 0) - { - GetAlphaMinMax(); - - amin = m_vt.m_alpha.min; - amax = m_vt.m_alpha.max; - } - else if(context->ALPHA.C == 1) - { - if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24) - { - amin = amax = 0x80; - } - } - else if(context->ALPHA.C == 2) - { - amin = amax = context->ALPHA.FIX; - } - } - - return context->ALPHA.IsOpaque(amin, amax); -} - -bool GSState::IsMipMapActive() -{ - return m_mipmap && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0; -} - -// GSTransferBuffer - -GSState::GSTransferBuffer::GSTransferBuffer() -{ - x = y = 0; - overflow = false; - start = end = total = 0; - buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32); -} - -GSState::GSTransferBuffer::~GSTransferBuffer() -{ - _aligned_free(buff); -} - -void GSState::GSTransferBuffer::Init(int tx, int ty) -{ - x = tx; - y = ty; - total = 0; -} - -bool GSState::GSTransferBuffer::Update(int tw, int th, int bpp, int& len) -{ - if(total == 0) - { - start = end = 0; - total = std::min((tw * bpp >> 3) * th, 1024 * 1024 * 4); - overflow = false; - } - - int remaining = total - end; - - if(len > remaining) - { - if(!overflow) - { - overflow = true; - - // printf("GS transfer overflow\n"); - } - - len = remaining; - } - - return len > 0; -} - -// hacks -#define Aggresive (s_crc_hack_level > 3) -#define Dx_only (s_crc_hack_level > 2) - -struct GSFrameInfo -{ - uint32 FBP; - uint32 FPSM; - uint32 FBMSK; - uint32 TBP0; - uint32 TPSM; - uint32 TZTST; - bool TME; -}; - -typedef bool (*GetSkipCount)(const GSFrameInfo& fi, int& skip); -CRC::Region g_crc_region = CRC::NoRegion; - -bool GSC_Okami(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x00e00 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x00000 && fi.TPSM == PSM_PSMCT32) - { - skip = 1000; - } - } - else - { - if(fi.TME && fi.FBP == 0x00e00 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x03800 && fi.TPSM == PSM_PSMT4) - { - skip = 0; - } - } - - return true; -} - -bool GSC_MetalGearSolid3(const GSFrameInfo& fi, int& skip) -{ - // Game requires sub RT support (texture cache limitation) - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x02000 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01000) && fi.TPSM == PSM_PSMCT24) - { - skip = 1000; // 76, 79 - } - else if(fi.TME && fi.FBP == 0x02800 && fi.FPSM == PSM_PSMCT24 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01000) && fi.TPSM == PSM_PSMCT32) - { - skip = 1000; // 69 - } - } - else - { - if(!fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x01000) && fi.FPSM == PSM_PSMCT32) - { - skip = 0; - } - else if(!fi.TME && fi.FBP == fi.TBP0 && fi.TBP0 == 0x2000 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMCT24) - { - if(g_crc_region == CRC::US || g_crc_region == CRC::JP || g_crc_region == CRC::KO) - { - skip = 119; //ntsc - } - else - { - skip = 136; //pal - } - } - } - - return true; -} - -bool GSC_DBZBT2(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && /*fi.FBP == 0x00000 && fi.FPSM == PSM_PSMCT16 &&*/ (fi.TBP0 == 0x01c00 || fi.TBP0 == 0x02000) && fi.TPSM == PSM_PSMZ16) - { - if (Dx_only) // Feel like texture shuffle but not sure - skip = 26; //27 - } - else if(!fi.TME && (fi.FBP == 0x02a00 || fi.FBP == 0x03000) && fi.FPSM == PSM_PSMCT16) - { - skip = 10; - } - } - - return true; -} - -bool GSC_DBZBT3(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01c00 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x00e00 || fi.TBP0 == 0x01000) && fi.TPSM == PSM_PSMT8H) - { - //not needed anymore? - //skip = 24; // blur - } - else if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00e00 || fi.FBP == 0x01000) && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT8H) - { - if (Dx_only) { // Ought to be fine with blending accuracy (fbmask?) - if(fi.FBMSK == 0x00000) - { - skip = 28; // outline - } - if(fi.FBMSK == 0x00FFFFFF) - { - skip = 1; - } - } - } - else if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00e00 || fi.FBP == 0x01000) && fi.FPSM == PSM_PSMCT16 && fi.TPSM == PSM_PSMZ16) - { - // Texture shuffling must work on openGL - if (Dx_only) - skip = 5; - } - else if(fi.TME && fi.FPSM == fi.TPSM && fi.TBP0 == 0x03f00 && fi.TPSM == PSM_PSMCT32) - { - if (fi.FBP == 0x03400) - { - skip = 1; //PAL - } - if(fi.FBP == 0x02e00) - { - skip = 3; //NTSC - } - } - } - - return true; -} - -bool GSC_SFEX3(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x00500 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00f00 && fi.TPSM == PSM_PSMCT16) - { - skip = 2; // blur - } - } - - return true; -} - -bool GSC_Bully(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x01180) && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180) && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.FPSM == fi.TPSM) - { - return false; // allowed - } - - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x01180) && fi.FPSM == PSM_PSMCT16S && fi.TBP0 == 0x02300 && fi.TPSM == PSM_PSMZ16S) - { - skip = 6; - } - } - else - { - if(!fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x01180) && fi.FPSM == PSM_PSMCT32) - { - skip = 0; - } - } - - return true; -} - -bool GSC_BullyCC(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x01180) && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180) && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.FPSM == fi.TPSM) - { - return false; // allowed - } - - if(!fi.TME && fi.FBP == 0x02800 && fi.FPSM == PSM_PSMCT24) - { - skip = 9; - } - } - - return true; -} - -bool GSC_SoTC(const GSFrameInfo& fi, int& skip) -{ - // Not needed anymore? What did it fix anyway? (rama) - if(skip == 0) - { - if(Aggresive && fi.TME /*&& fi.FBP == 0x03d80*/ && fi.FPSM == 0 && fi.TBP0 == 0x03fc0 && fi.TPSM == 1) - { - skip = 48; //removes sky bloom - } - /* - if(fi.TME && fi.FBP == 0x02b80 && fi.FPSM == PSM_PSMCT24 && fi.TBP0 == 0x01e80 && fi.TPSM == PSM_PSMCT24) - { - skip = 9; - } - else if(fi.TME && fi.FBP == 0x01c00 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x03800 && fi.TPSM == PSM_PSMCT32) - { - skip = 8; - } - else if(fi.TME && fi.FBP == 0x01e80 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x03880 && fi.TPSM == PSM_PSMCT32) - { - skip = 8; - }*/ - } - - - - - - return true; -} - -bool GSC_OnePieceGrandAdventure(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x02d00 && fi.FPSM == PSM_PSMCT16 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x00e00 || fi.TBP0 == 0x00f00) && fi.TPSM == PSM_PSMCT16) - { - skip = 4; - } - } - - return true; -} - -bool GSC_OnePieceGrandBattle(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x02d00 && fi.FPSM == PSM_PSMCT16 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x00f00) && fi.TPSM == PSM_PSMCT16) - { - skip = 4; - } - } - - return true; -} - -bool GSC_ICO(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x00800 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x03d00 && fi.TPSM == PSM_PSMCT32) - { - skip = 3; - } - else if(fi.TME && fi.FBP == 0x00800 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x02800 && fi.TPSM == PSM_PSMT8H) - { - skip = 1; - } - else if( Aggresive && fi.TME && fi.FBP == 0x0800 && (fi.TBP0 == 0x2800 || fi.TBP0 ==0x2c00) && fi.TPSM ==0 && fi.FBMSK == 0) - { - skip = 1; - } - } - else - { - if(fi.TME && fi.TBP0 == 0x00800 && fi.TPSM == PSM_PSMCT32) - { - skip = 0; - } - } - - return true; -} - -bool GSC_GT4(const GSFrameInfo& fi, int& skip) -{ - // Game requires to extract source from RT (block boundary) (texture cache limitation) - if(skip == 0) - { - if(fi.TME && fi.FBP >= 0x02f00 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180 /*|| fi.TBP0 == 0x01a40*/) && fi.TPSM == PSM_PSMT8) //TBP0 0x1a40 progressive - { - skip = 770; //ntsc, progressive 1540 - } - if(g_crc_region == CRC::EU && fi.TME && fi.FBP >= 0x03400 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01400 ) && fi.TPSM == PSM_PSMT8) - { - skip = 880; //pal - } - else if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x01400) && fi.FPSM == PSM_PSMCT24 && fi.TBP0 >= 0x03420 && fi.TPSM == PSM_PSMT8) - { - // TODO: removes gfx from where it is not supposed to (garage) - // skip = 58; - } - } - - return true; -} - -bool GSC_GT3(const GSFrameInfo& fi, int& skip) -{ - // Same issue as GSC_GT4 ??? - if(skip == 0) - { - if(fi.TME && fi.FBP >= 0x02de0 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180) && fi.TPSM == PSM_PSMT8) - { - skip = 770; - } - } - - return true; -} - -bool GSC_GTConcept(const GSFrameInfo& fi, int& skip) -{ - // Same issue as GSC_GT4 ??? - if(skip == 0) - { - if(fi.TME && fi.FBP >= 0x03420 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01400) && fi.TPSM == PSM_PSMT8) - { - skip = 880; - } - } - - return true; -} - -bool GSC_WildArms4(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x03100 && fi.FPSM == PSM_PSMZ32 && fi.TBP0 == 0x01c00 && fi.TPSM == PSM_PSMZ32) - { - skip = 100; - } - } - else - { - if(fi.TME && fi.FBP == 0x00e00 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x02a00 && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - return true; -} - -bool GSC_WildArms5(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x03100 && fi.FPSM == PSM_PSMZ32 && fi.TBP0 == 0x01c00 && fi.TPSM == PSM_PSMZ32) - { - skip = 100; - } - } - else - { - if(fi.TME && fi.FBP == 0x00e00 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x02a00 && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - return true; -} - -bool GSC_Manhunt2(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x03c20 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x01400 && fi.TPSM == PSM_PSMT8) - { - skip = 640; - } - } - - return true; -} - -bool GSC_CrashBandicootWoC(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x008c0 || fi.FBP == 0x00a00) && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x008c0 || fi.TBP0 == 0x00a00) && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.FPSM == fi.TPSM) - { - return false; // allowed - } - - if(fi.TME && (fi.FBP == 0x01e40 || fi.FBP == 0x02200) && fi.FPSM == PSM_PSMZ24 && (fi.TBP0 == 0x01180 || fi.TBP0 == 0x01400) && fi.TPSM == PSM_PSMZ24) - { - skip = 42; - } - } - else - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x008c0 || fi.FBP == 0x00a00) && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x03c00 && fi.TPSM == PSM_PSMCT32) - { - skip = 0; - } - else if(!fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x008c0 || fi.FBP == 0x00a00)) - { - skip = 0; - } - } - - return true; -} - -bool GSC_ResidentEvil4(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x03100 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x01c00 && fi.TPSM == PSM_PSMZ24) - { - skip = 176; - } - else if(fi.TME && fi.FBP ==0x03100 && (fi.TBP0==0x2a00 ||fi.TBP0==0x3480) && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0) - { - skip = 1; - } - } - - return true; -} - -bool GSC_SacredBlaze(const GSFrameInfo& fi, int& skip) -{ - //Fix Sacred Blaze rendering glitches - if(skip == 0) - { - if(fi.TME && (fi.FBP==0x0000 || fi.FBP==0x0e00) && (fi.TBP0==0x2880 || fi.TBP0==0x2a80 ) && fi.FPSM==fi.TPSM && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0x0) - { - skip = 1; - } - } - return true; -} - -template -bool GSC_SMTNocturneDDS(const GSFrameInfo& fi, int& skip) -{ - // stop the motion blur on the main character and - // smudge filter from being drawn on USA versions of - // Nocturne, Digital Devil Saga 1 and Digital Devil Saga 2 - - if(Aggresive && g_crc_region == CRC::US && skip == 0 && fi.TBP0 == 0xE00 && fi.TME) - { - // Note: it will crash if the core doesn't allocate the EE mem in 0x2000_0000 (unlikely but possible) - // Aggresive hacks are evil anyway - - // Nocturne: - // -0x5900($gp), ref at 0x100740 - const int state = *(int*)(state_addr); - if (state == 23 || state == 24 || state == 25) - skip = 1; - } - return true; -} - -bool GSC_Spartan(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(g_crc_region == CRC::EU &&fi.TME && fi.FBP == 0x02000 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x00000 && fi.TPSM == PSM_PSMCT32) - { - skip = 107; - } - if(g_crc_region == CRC::JP && fi.TME && fi.FBP == 0x02180 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x2180 && fi.TPSM == PSM_PSMCT32) - { - skip = 3; - } - else - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - } - - return true; -} - -bool GSC_AceCombat4(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x02a00 && fi.FPSM == PSM_PSMZ24 && fi.TBP0 == 0x01600 && fi.TPSM == PSM_PSMZ24) - { - skip = 71; // clouds (z, 16-bit) - } - else if(fi.TME && fi.FBP == 0x02900 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x00000 && fi.TPSM == PSM_PSMCT24) - { - skip = 28; // blur - } - } - - return true; -} - -bool GSC_Tekken5(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x02d60 || fi.FBP == 0x02d80 || fi.FBP == 0x02ea0 || fi.FBP == 0x03620) && fi.FPSM == fi.TPSM && fi.TBP0 == 0x00000 && fi.TPSM == PSM_PSMCT32) - { - skip = 95; - } - else if(fi.TME && (fi.FBP == 0x02bc0 || fi.FBP == 0x02be0 || fi.FBP == 0x02d00) && fi.FPSM == fi.TPSM && fi.TBP0 == 0x00000 && fi.TPSM == PSM_PSMCT32) - { - skip = 2; - } - else if(fi.TME) - { - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 24; - } - } - } - - return true; -} - -bool GSC_IkkiTousen(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x00a80 && fi.FPSM == PSM_PSMZ24 && fi.TBP0 == 0x01180 && fi.TPSM == PSM_PSMZ24) - { - skip = 1000; // shadow (result is broken without depth copy, also includes 16 bit) - } - else if(fi.TME && fi.FBP == 0x00700 && fi.FPSM == PSM_PSMZ24 && fi.TBP0 == 0x01180 && fi.TPSM == PSM_PSMZ24) - { - skip = 11; // blur - } - } - else if(skip > 7) - { - if(fi.TME && fi.FBP == 0x00700 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00700 && fi.TPSM == PSM_PSMCT16) - { - skip = 7; // the last steps of shadow drawing - } - } - - return true; -} - -bool GSC_GodOfWar(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x00000 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00000 && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x03FFF) - { - skip = 1000; - } - else if(fi.TME && fi.FBP == 0x00000 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x00000 && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0xff000000) - { - skip = 1; // blur - } - else if(fi.FBP == 0x00000 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT8 && ((fi.TZTST == 2 && fi.FBMSK == 0x00FFFFFF) || (fi.TZTST == 1 && fi.FBMSK == 0x00FFFFFF) || (fi.TZTST == 3 && fi.FBMSK == 0xFF000000))) - { - skip = 1; // wall of fog - } - else if (fi.TME && (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S)) - { - // Equivalent to the UserHacks_AutoSkipDrawDepth hack but enabled by default - // http://forums.pcsx2.net/Thread-God-of-War-Red-line-rendering-explained - skip = 1; - } - } - else - { - if(fi.TME && fi.FBP == 0x00000 && fi.FPSM == PSM_PSMCT16) - { - skip = 3; - } - } - - return true; -} - -bool GSC_GodOfWar2(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME) - { - if( fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16 // ntsc - || fi.FBP == 0x02100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT16) // pal - { - skip = 1000; // shadows - } - if((fi.FBP == 0x00100 || fi.FBP == 0x02100) && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 & 0x03000) == 0x03000 - && (fi.TPSM == PSM_PSMT8 || fi.TPSM == PSM_PSMT4) - && ((fi.TZTST == 2 && fi.FBMSK == 0x00FFFFFF) || (fi.TZTST == 1 && fi.FBMSK == 0x00FFFFFF) || (fi.TZTST == 3 && fi.FBMSK == 0xFF000000))) - { - skip = 1; // wall of fog - } - else if(Aggresive && fi.TPSM == PSM_PSMCT24 && fi.TME && (fi.FBP ==0x1300 ) && (fi.TBP0 ==0x0F00 || fi.TBP0 ==0x1300 || fi.TBP0==0x2b00)) // || fi.FBP == 0x0100 - { - skip = 1; // global haze/halo - } - else if(Aggresive && fi.TPSM == PSM_PSMCT24 && fi.TME && (fi.FBP ==0x0100 ) && (fi.TBP0==0x2b00 || fi.TBP0==0x2e80)) //480P 2e80 - { - skip = 1; // water effect and water vertical lines - } - else if (fi.TME && (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S)) - { - // Equivalent to the UserHacks_AutoSkipDrawDepth hack but enabled by default - // http://forums.pcsx2.net/Thread-God-of-War-Red-line-rendering-explained - skip = 1; - } - } - } - else - { - if(fi.TME && (fi.FBP == 0x00100 || fi.FBP == 0x02100) && fi.FPSM == PSM_PSMCT16) - { - skip = 3; - } - } - - return true; -} - -bool GSC_GiTS(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01400 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x02e40 && fi.TPSM == PSM_PSMCT16) - { - skip = 1315; - } - } - else - { - } - - return true; -} - -bool GSC_Onimusha3(const GSFrameInfo& fi, int& skip) -{ - if(fi.TME /*&& (fi.FBP == 0x00000 || fi.FBP == 0x00700)*/ && (fi.TBP0 == 0x01180 || fi.TBP0 == 0x00e00 || fi.TBP0 == 0x01000 || fi.TBP0 == 0x01200) && (fi.TPSM == PSM_PSMCT32 || fi.TPSM == PSM_PSMCT24)) - { - skip = 1; - } - - return true; -} - -bool GSC_TalesOfAbyss(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00e00) && fi.TBP0 == 0x01c00 && fi.TPSM == PSM_PSMT8) // copies the z buffer to the alpha channel of the fb - { - skip = 1000; - } - else if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00e00) && (fi.TBP0 == 0x03560 || fi.TBP0 == 0x038e0) && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - else - { - if(fi.TME && fi.TPSM != PSM_PSMT8) - { - skip = 0; - } - } - - return true; -} - -bool GSC_SonicUnleashed(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FPSM == PSM_PSMCT16S && fi.TBP0 == 0x00000 && fi.TPSM == PSM_PSMCT16) - { - skip = 1000; // shadow - } - } - else - { - if(fi.TME && fi.FBP == 0x00000 && fi.FPSM == PSM_PSMCT16 && fi.TPSM == PSM_PSMCT16S) - { - skip = 2; - } - } - - return true; -} - -bool GSC_SimpsonsGame(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == fi.TPSM && fi.TBP0 == 0x03000 && fi.TPSM == PSM_PSMCT32) - { - skip = 100; - } - } - else - { - if(fi.TME && fi.FBP == 0x03000 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT8H) - { - skip = 2; - } - } - - return true; -} - -bool GSC_Genji(const GSFrameInfo& fi, int& skip) -{ - if( !skip && fi.TME && (fi.FBP == 0x700 || fi.FBP == 0x0) && fi.TBP0 == 0x1500 && fi.TPSM ) - skip=1; - - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01500 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00e00 && fi.TPSM == PSM_PSMZ16) - { - // likely fixed in openGL (texture shuffle) - if (Dx_only) - skip = 6; - else - return false; - } - else if(fi.TPSM == PSM_PSMCT24 && fi.TME ==0x0001 && fi.TBP0==fi.FBP) - { - skip = 1; - } - else if(fi.TPSM == PSM_PSMT8H && fi.FBMSK == 0) - { - skip = 1; - } - } - else - { - } - - return true; -} - -bool GSC_StarOcean3(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - // The game emulate a stencil buffer with the alpha channel of the RT - // The operation of the stencil is selected with the palette - // For example -1 wrap will be [240, 16, 32, 48 ....] - // i.e. p[A>>4] = (A - 16) % 256 - // - // The fastest and accurate solution will be to replace this pseudo stencil - // by a dedicated GPU draw call - // 1/ Use future GPU capabilities to do a "kind" of SW blending - // 2/ Use a real stencil/atomic image, and then compute the RT alpha value - // - // Both of those solutions will increase code complexity (and only avoid upscaling - // glitches) - - if(skip == 0) - { - if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH) - { - skip = 1000; // - } - } - else - { - if(!(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH)) - { - skip = 0; - } - } - - return true; -} - -bool GSC_ValkyrieProfile2(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - /*if(fi.TME && (fi.FBP == 0x018c0 || fi.FBP == 0x02180) && fi.FPSM == fi.TPSM && fi.TBP0 >= 0x03200 && fi.TPSM == PSM_PSMCT32) //NTSC only, !(fi.TBP0 == 0x03580 || fi.TBP0 == 0x03960) - { - skip = 1; //red garbage in lost forest, removes other effects... - } - if(fi.TME && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x03FFF) - { - skip = 1; // //garbage in cutscenes, doesn't remove completely, better use "Alpha Hack" - }*/ - if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH) - { - // GH: Hack is quite similar to GSC_StarOcean3. It is potentially the same issue. - skip = 1000; // - } - } - else - { - if(!(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH)) - { - skip = 0; - } - } - - return true; -} - -bool GSC_RadiataStories(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x03FFF) - { - skip = 1; - } - else if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH) - { - // GH: Hack is quite similar to GSC_StarOcean3. It is potentially the same issue. - // Fixed on openGL - skip = 1000; - } - } - else - { - if(!(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH)) - { - skip = 0; - } - } - - return true; -} - -bool GSC_HauntingGround(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16S && fi.FBMSK == 0x03FFF) - { - if (Dx_only) - skip = 1; - else - return false; - } - else if(fi.TME && fi.FBP == 0x3000 && fi.TBP0 == 0x3380) - { - skip = 1; // bloom - } - else if(fi.TME && (fi.FBP ==0x2200) && (fi.TBP0 ==0x3a80) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - else if(fi.FBP ==0x2200 && fi.TBP0==0x3000 && fi.TPSM == PSM_PSMT8H && fi.FBMSK == 0) - { - skip = 1; - } - else if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - - return true; -} - -bool GSC_EvangelionJo(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.TBP0 == 0x2BC0 || (fi.FBP == 0 || fi.FBP == 0x1180) && (fi.FPSM | fi.TPSM) == 0) - { - skip = 1; - } - } - - return true; -} - -bool GSC_SuikodenTactics(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if( !fi.TME && fi.TPSM == PSM_PSMT8H && fi.FPSM == 0 && - fi.FBMSK == 0x0FF000000 && fi.TBP0 == 0 && GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) - { - skip = 4; - } - } - - return true; -} - -bool GSC_CaptainTsubasa(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x1C00 && !fi.FBMSK) - { - skip = 1; - } - } - return true; -} - -bool GSC_Oneechanbara2Special(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TPSM == PSM_PSMCT24 && fi.TME && fi.FBP == 0x01180) - { - skip = 1; - } - } - - return true; -} - -bool GSC_NarutimateAccel(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x3800 && fi.TBP0 == 0 && (fi.FPSM | fi.TPSM) == 0) - { - skip = 105; - } - else if(!fi.TME && fi.FBP == 0x3800 && fi.TBP0 == 0x1E00 && fi.FPSM == 0 && fi.TPSM == 49 && fi.FBMSK == 0xFF000000) - { - skip = 1; - } - } - else - { - if(fi.FBP == 0 && fi.TBP0 == 0x3800 && fi.TME && (fi.FPSM | fi.TPSM) == 0) - { - skip = 1; - } - } - - return true; -} - -bool GSC_Naruto(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x3800 && fi.TBP0 == 0 && (fi.FPSM | fi.TPSM) == 0) - { - skip = 105; - } - else if(!fi.TME && fi.FBP == 0x3800 && fi.TBP0 == 0x1E00 && fi.FPSM == 0 && fi.TPSM == 49 && fi.FBMSK == 0xFF000000) - { - skip = 0; - } - } - else - { - if(fi.FBP == 0 && fi.TBP0 == 0x3800 && fi.TME && (fi.FPSM | fi.TPSM) == 0) - { - skip = 1; - } - } - - return true; -} - -bool GSC_EternalPoison(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - // Texture shuffle ??? - if(fi.TPSM == PSM_PSMCT16S && fi.TBP0 == 0x3200) - { - skip = 1; - } - } - return true; -} - -bool GSC_LegoBatman(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(Aggresive && skip == 0) - { - if(fi.TME && fi.TPSM == PSM_PSMZ16 && fi.FPSM == PSM_PSMCT16 && fi.FBMSK == 0x00000) - { - skip = 3; - } - } - return true; -} - -bool GSC_SakuraTaisen(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(!fi.TME && (fi.FBP == 0x0 || fi.FBP == 0x1180) && (fi.TBP0!=0x3fc0 && fi.TBP0!=0x3c9a && fi.TBP0 !=0x3dec /*fi.TBP0 ==0x38d0 || fi.TBP0==0x3912 ||fi.TBP0==0x3bdc ||fi.TBP0==0x3ab3 ||fi.TBP0<=0x3a92*/) && fi.FPSM == PSM_PSMCT32 && (fi.TPSM == PSM_PSMT8 || fi.TPSM == PSM_PSMT4) && (fi.FBMSK == 0x00FFFFFF || !fi.FBMSK)) - { - skip = 0; //3dec 3fc0 3c9a - } - if(!fi.TME && (fi.FBP | fi.TBP0) !=0 && (fi.FBP | fi.TBP0) !=0x1180 && (fi.FBP | fi.TBP0) !=0x3be0 && (fi.FBP | fi.TBP0) !=0x3c80 && fi.TBP0!=0x3c9a && (fi.FBP | fi.TBP0) !=0x3d80 && fi.TBP0 !=0x3dec&& fi.FPSM == PSM_PSMCT32 && (fi.FBMSK==0)) - { - skip =0; //3dec 3fc0 3c9a - } - if(!fi.TME && (fi.FBP | fi.TBP0) !=0 && (fi.FBP | fi.TBP0) !=0x1180 && (fi.FBP | fi.TBP0) !=0x3be0 && (fi.FBP | fi.TBP0) !=0x3c80 && (fi.FBP | fi.TBP0) !=0x3d80 && fi.TBP0!=0x3c9a && fi.TBP0 !=0x3de && fi.FPSM == PSM_PSMCT32 && (fi.FBMSK==0)) - { - skip =1; //3dec 3fc0 3c9a - } - else if(fi.TME && (fi.FBP == 0 || fi.FBP == 0x1180) && fi.TBP0 == 0x35B8 && fi.TPSM == PSM_PSMT4) - { - skip = 1; - } - else - { - if(!fi.TME && (fi.FBP | fi.TBP0) ==0x38d0 && fi.FPSM == PSM_PSMCT32 ) - { - skip = 1; //3dec 3fc0 3c9a - } - } - } - - return true; -} - -bool GSC_Tenchu(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.TPSM == PSM_PSMZ16 && fi.FPSM == PSM_PSMCT16 && fi.FBMSK == 0x03FFF) - { - skip = 3; - } - } - - return true; -} - -bool GSC_Sly3(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00700 || fi.FBP == 0x00a80 || fi.FBP == 0x00e00) && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x00700 || fi.TBP0 == 0x00a80 || fi.TBP0 == 0x00e00) && fi.TPSM == PSM_PSMCT16) - { - skip = 1000; - } - } - else - { - if(fi.TME && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x03FFF) - { - skip = 3; - } - } - - return true; -} - -bool GSC_Sly2(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00700 || fi.FBP == 0x00800) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x03FFF) - { - skip = 1000; - } - } - else - { - if(fi.TME && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x03FFF) - { - skip = 3; - } - } - - return true; -} - -bool GSC_ShadowofRome(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.FBP && fi.TPSM == PSM_PSMT8H && ( fi.FBMSK ==0x00FFFFFF)) - { - skip =1; - } - else if(fi.TME ==0x0001 && (fi.TBP0==0x1300 || fi.TBP0==0x0f00) && fi.FBMSK>=0xFFFFFF) - { - skip = 1; - } - else if(fi.TME && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 ==0x0160 ||fi.TBP0==0x01e0 || fi.TBP0<=0x0800) && fi.TPSM == PSM_PSMT8) - { - skip = 1; - } - else if(fi.TME && (fi.TBP0==0x0700) && (fi.TPSM == PSM_PSMCT32 || fi.TPSM == PSM_PSMCT24)) - { - skip = 1; - } - } - - return true; -} - -bool GSC_FFXII(const GSFrameInfo& fi, int& skip) -{ - if(Aggresive && skip == 0) - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - return true; -} - -bool GSC_FFX2(const GSFrameInfo& fi, int& skip) -{ - if(Aggresive && skip == 0) - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - return true; -} - -bool GSC_FFX(const GSFrameInfo& fi, int& skip) -{ - if(Aggresive && skip == 0) - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - return true; -} - -bool GSC_DemonStone(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01400 && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01000) && fi.TPSM == PSM_PSMCT16) - { - skip = 1000; - } - } - else - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x01000) && fi.FPSM == PSM_PSMCT32) - { - skip = 2; - } - } - - return true; -} - -bool GSC_BigMuthaTruckers(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00a00) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16) - { - skip = 3; - } - } - - return true; -} - -bool GSC_TimeSplitters2(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00e00 || fi.FBP == 0x01000) && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x00e00 || fi.TBP0 == 0x01000) && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0x0FF000000) - { - skip = 1; - } - } - - return true; -} - -bool GSC_LordOfTheRingsTwoTowers(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x01180 || fi.FBP == 0x01400) && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01000) && fi.TPSM == PSM_PSMCT16) - { - skip = 1000;//shadows - } - else if(fi.TME && fi.TPSM == PSM_PSMZ16 && fi.TBP0 == 0x01400 && fi.FPSM == PSM_PSMCT16 && fi.FBMSK == 0x03FFF) - { - skip = 3; //wall of fog - } - } - else - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x01000) && (fi.TBP0 == 0x01180 || fi.TBP0 == 0x01400) && fi.FPSM == PSM_PSMCT32) - { - skip = 2; - } - } - - return true; -} - -bool GSC_LordOfTheRingsThirdAge(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(!fi.TME && fi.FBP == 0x03000 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4 && fi.FBMSK == 0xFF000000) - { - skip = 1000; //shadows - } - } - else - { - if (fi.TME && (fi.FBP == 0x0 || fi.FBP == 0x00e00 || fi.FBP == 0x01000) && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x03000 && fi.TPSM == PSM_PSMCT24) - { - skip = 1; - } - } - - return true; -} - -bool GSC_RedDeadRevolver(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(!fi.TME && (fi.FBP == 0x02420 || fi.FBP == 0x025e0) && fi.FPSM == PSM_PSMCT24) - { - skip = 1200; - } - else if(fi.TME && (fi.FBP == 0x00800 || fi.FBP == 0x009c0) && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x01600 || fi.TBP0 == 0x017c0) && fi.TPSM == PSM_PSMCT32) - { - skip = 2; //filter - } - else if(fi.FBP == 0x03700 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMCT24) - { - skip = 2; //blur - } - } - else - { - if(fi.TME && (fi.FBP == 0x00800 || fi.FBP == 0x009c0) && fi.FPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - return true; -} - -bool GSC_HeavyMetalThunder(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x03100 && fi.FPSM == fi.TPSM && fi.TBP0 == 0x01c00 && fi.TPSM == PSM_PSMZ32) - { - skip = 100; - } - } - else - { - if(fi.TME && fi.FBP == 0x00e00 && fi.FPSM == fi.TPSM && fi.TBP0 == 0x02a00 && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - return true; -} - -bool GSC_BleachBladeBattlers(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01180 && fi.FPSM == fi.TPSM && fi.TBP0 == 0x03fc0 && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - return true; -} - -bool GSC_Castlevania(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - // This hack removes the shadows and globally darker image - // I think there are 2 issues on GSdx - // - // 1/ potential not correctly supported colclip. - // - // 2/ use of a 32 bits format to emulate a 16 bit formats - // For example, if you blend 64 time the value 4 on a dark destination pixels - // - // FMT32: 4*64 = 256 <= white pixels - // - // FMT16: output of blending will always be 0 because the 3 lsb of color is dropped. - // Therefore the pixel remains dark !!! - if(fi.TME && fi.FBP == 0 && fi.TBP0 && fi.TPSM == 10 && fi.FBMSK == 0xFFFFFF) - { - skip = 2; - } - } - - return true; -} - -bool GSC_Black(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - // Note: the first part of the hack must be fixed in openGL (texture shuffle). Remains the 2nd part (HasSharedBits) - if(fi.TME /*&& (fi.FBP == 0x00000 || fi.FBP == 0x008c0)*/ && fi.FPSM == PSM_PSMCT16 && (fi.TBP0 == 0x01a40 || fi.TBP0 == 0x01b80 || fi.TBP0 == 0x030c0) && fi.TPSM == PSM_PSMZ16 || (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM))) - { - skip = 5; - } - } - else - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x008c0 || fi.FBP == 0x0a00 ) && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4) - { - skip = 0; - } - else if(!fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT8H) - { - skip = 0; - } - } - - return true; -} - -bool GSC_CrashNburn(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - - return true; -} - -bool GSC_TombRaider(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01000 && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - return true; -} - -bool GSC_TombRaiderLegend(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01000 && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32 && (fi.TBP0 == 0x2b60 ||fi.TBP0 == 0x2b80 || fi.TBP0 == 0x2E60 ||fi.TBP0 ==0x3020 ||fi.TBP0 == 0x3200 || fi.TBP0 == 0x3320)) - { - skip = 1; - } - else if(fi.TPSM == PSM_PSMCT32 && (fi.TPSM | fi.FBP)==0x2fa0 && (fi.TBP0==0x2bc0 ) && fi.FBMSK ==0) - { - skip = 2; - } - - - }// ||fi.TBP0 ==0x2F00 - - return true; -} - -bool GSC_TombRaiderUnderWorld(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01000 && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32 && (fi.TBP0 == 0x2B60 /*|| fi.TBP0 == 0x2EFF || fi.TBP0 ==0x2F00 || fi.TBP0 == 0x3020*/ || fi.TBP0 >= 0x2C01 && fi.TBP0!=0x3029 && fi.TBP0!=0x302d)) - { - skip = 1; - } - else if(fi.TPSM == PSM_PSMCT32 && (fi.TPSM | fi.FBP)==0x2c00 && (fi.TBP0 ==0x0ee0) && fi.FBMSK ==0) - { - skip = 2; - } - /*else if(fi.TPSM == PSM_PSMCT16 && (fi.TPSM | fi.FBP)>=0x0 && (fi.TBP0 >=0x0) && fi.FBMSK ==0) - { - skip = 600; - }*/ - } - - return true; -} - -bool GSC_SSX3(const GSFrameInfo& fi, int& skip) -{ - if(Aggresive && skip == 0) - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - - return true; -} - -bool GSC_FFVIIDoC(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x01c00 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x02c00 && fi.TPSM == PSM_PSMCT24) - { - skip = 1; - } - if(!fi.TME && fi.FBP == 0x01c00 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x01c00 && fi.TPSM == PSM_PSMCT24) - { - //skip = 1; - } - } - - return true; -} - -bool GSC_DevilMayCry3(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - - if(Dx_only && fi.TME && fi.FBP == 0x01800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01000 && fi.TPSM == PSM_PSMZ16) - { - skip = 32; - } - if(fi.TME && fi.FBP == 0x01800 && fi.FPSM == PSM_PSMZ32 && fi.TBP0 == 0x0800 && fi.TPSM == PSM_PSMT8H) - { - skip = 16; - } - if(fi.TME && fi.FBP == 0x01800 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x0 && fi.TPSM == PSM_PSMT8H) - { - skip = 24; - } - } - - return true; -} - -bool GSC_StarWarsForceUnleashed(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x038a0 || fi.FBP == 0x03ae0) && fi.FPSM == fi.TPSM && fi.TBP0 == 0x02300 && fi.TPSM == PSM_PSMZ24) - { - skip = 1000; //9, shadows - } - } - else - { - if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x034a0 || fi.TBP0 == 0x36e0) && fi.TPSM == PSM_PSMCT16) - { - skip = 2; - } - - } - - return true; -} - -bool GSC_StarWarsBattlefront(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP > 0x0 && fi.FBP < 0x01000) && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 > 0x02000 && fi.TBP0 < 0x03000) && fi.TPSM == PSM_PSMT8) - { - skip = 1; - } - } - - return true; -} - -bool GSC_StarWarsBattlefront2(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP > 0x01000 && fi.FBP < 0x02000) && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 > 0x0 && fi.TBP0 < 0x01000) && fi.TPSM == PSM_PSMT8) - { - skip = 1; - } - if(fi.TME && (fi.FBP > 0x01000 && fi.FBP < 0x02000) && fi.FPSM == PSM_PSMZ32 && (fi.TBP0 > 0x0 && fi.TBP0 < 0x01000) && fi.TPSM == PSM_PSMT8) - { - skip = 1; - } - } - - return true; -} - -bool GSC_BlackHawkDown(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(Dx_only && fi.TME && fi.FBP == 0x00800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01800 && fi.TPSM == PSM_PSMZ16) - { - skip = 2; //wall of fog - } - if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT8) - { - skip = 5; //night filter - } - } - - return true; -} - -bool GSC_Burnout(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x01dc0 || fi.FBP == 0x02200) && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x01dc0 || fi.TBP0 == 0x02200) && fi.TPSM == PSM_PSMCT32) - { - skip = 4; - } - else if(fi.TME && fi.FPSM == PSM_PSMCT16 && fi.TPSM == PSM_PSMZ16) //fog - { - if (!Dx_only) return false; - - if(fi.FBP == 0x00a00 && fi.TBP0 == 0x01e00) - { - skip = 4; //pal - } - if(fi.FBP == 0x008c0 && fi.TBP0 == 0x01a40) - { - skip = 3; //ntsc - } - } - else if (fi.TME && (fi.FBP == 0x02d60 || fi.FBP == 0x033a0) && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x02d60 || fi.TBP0 == 0x033a0) && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0x0) - { - skip = 2; //impact screen - } - } - - return true; -} - -bool GSC_MidnightClub3(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP > 0x01d00 && fi.FBP <= 0x02a00) && fi.FPSM == PSM_PSMCT32 && (fi.FBP >= 0x01600 && fi.FBP < 0x03260) && fi.TPSM == PSM_PSMT8H) - { - skip = 1; - } - } - - return true; -} - -bool GSC_SpyroNewBeginning(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == fi.TPSM && fi.TBP0 == 0x034a0 && fi.TPSM == PSM_PSMCT16) - { - skip = 2; - } - } - - return true; -} - -bool GSC_SpyroEternalNight(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == fi.TPSM && (fi.TBP0 == 0x034a0 ||fi.TBP0 == 0x035a0 || fi.TBP0 == 0x036e0) && fi.TPSM == PSM_PSMCT16) - { - skip = 2; - } - } - - return true; -} - -bool GSC_TalesOfLegendia(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x3f80 || fi.FBP == 0x03fa0) && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT8) - { - skip = 3; //3, 9 - } - if(fi.TME && fi.FBP == 0x3800 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMZ32) - { - skip = 2; - } - if(fi.TME && fi.FBP && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x3d80) - { - skip = 1; - } - if(fi.TME && fi.FBP ==0x1c00 && (fi.TBP0==0x2e80 ||fi.TBP0==0x2d80) && fi.TPSM ==0 && fi.FBMSK == 0xff000000) - { - skip = 1; - } - if(!fi.TME && fi.FBP ==0x2a00 && (fi.TBP0==0x1C00 ) && fi.TPSM ==0 && fi.FBMSK == 0x00FFFFFF) - { - skip = 1; - } - } - - return true; -} - -bool GSC_NanoBreaker(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP == 0x0 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x03800 || fi.TBP0 == 0x03900) && fi.TPSM == PSM_PSMCT16S) - { - skip = 2; - } - } - - return true; -} - -bool GSC_Kunoichi(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(!fi.TME && (fi.FBP == 0x0 || fi.FBP == 0x00700 || fi.FBP == 0x00800) && fi.FPSM == PSM_PSMCT32 && fi.FBMSK == 0x00FFFFFF) - { - skip = 3; - } - if(fi.TME && (fi.FBP ==0x0700 || fi.FBP==0) && fi.TBP0==0x0e00 && fi.TPSM ==0 && fi.FBMSK == 0) - { - skip = 1; - } - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - else - { - if(fi.TME && (fi.FBP == 0x0e00) && fi.FPSM == PSM_PSMCT32 && fi.FBMSK == 0xFF000000) - { - skip = 0; - } - } - - return true; -} - -bool GSC_Yakuza(const GSFrameInfo& fi, int& skip) -{ - if(1 - && !skip - && !fi.TME - && (0 - || fi.FBP == 0x1c20 && fi.TBP0 == 0xe00 //ntsc (EU and US DVDs) - || fi.FBP == 0x1e20 && fi.TBP0 == 0x1000 //pal1 - || fi.FBP == 0x1620 && fi.TBP0 == 0x800 //pal2 - ) - && fi.TPSM == PSM_PSMZ24 - && fi.FPSM == PSM_PSMCT32 - /* - && fi.FBMSK ==0xffffff - && fi.TZTST - && !GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM) - */ - ) - { - skip=3; - } - return true; -} - -bool GSC_Yakuza2(const GSFrameInfo& fi, int& skip) -{ - if(1 - && !skip - && !fi.TME - && (0 - || fi.FBP == 0x1c20 && fi.TBP0 == 0xe00 //ntsc (EU DVD) - || fi.FBP == 0x1e20 && fi.TBP0 == 0x1000 //pal1 - || fi.FBP == 0x1620 && fi.TBP0 == 0x800 //pal2 - ) - && fi.TPSM == PSM_PSMZ24 - && fi.FPSM == PSM_PSMCT32 - /* - && fi.FBMSK ==0xffffff - && fi.TZTST - && !GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM) - */ - ) - { - skip=17; - } - return true; -} - -bool GSC_SkyGunner(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - - if(!fi.TME && !(fi.FBP == 0x0 || fi.FBP == 0x00800 || fi.FBP == 0x008c0 || fi.FBP == 0x03e00) && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x0 || fi.TBP0 == 0x01800) && fi.TPSM == PSM_PSMCT32) - { - skip = 1; //Huge Vram usage - } - } - - return true; -} - -bool GSC_JamesBondEverythingOrNothing(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - - if(fi.TME && (fi.FBP < 0x02000 && !(fi.FBP == 0x0 || fi.FBP == 0x00e00)) && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 > 0x01c00 && fi.TBP0 < 0x03000) && fi.TPSM == PSM_PSMT8) - { - skip = 1; //Huge Vram usage - } - } - - return true; -} - -bool GSC_ZettaiZetsumeiToshi2(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.TPSM == PSM_PSMCT16S && (fi.FBMSK >= 0x6FFFFFFF || fi.FBMSK ==0) ) - { - skip = 1000; - } - else if(fi.TME && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0xFF000000) - { - skip = 2; - } - else if((fi.FBP | fi.TBP0)&& fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x3FFF) - { - // Note start of the effect (texture shuffle) is fixed in openGL but maybe not the extra draw - // call.... - skip = 1000; - } - - } - else - { - if(!fi.TME && fi.TPSM == PSM_PSMCT32 && fi.FBP==0x1180 && fi.TBP0==0x1180 && (fi.FBMSK ==0)) - { - skip = 0; // - } - if(fi.TME && fi.TPSM == PSM_PSMT4 && fi.FBP && (fi.TBP0!=0x3753)) - { - skip = 0; // - } - if(fi.TME && fi.TPSM == PSM_PSMT8H && fi.FBP ==0x22e0 && fi.TBP0 ==0x36e0 ) - { - skip = 0; // - } - if(!fi.TME && fi.TPSM == PSM_PSMT8H && fi.FBP ==0x22e0 ) - { - skip = 0; // - } - if(fi.TME && fi.TPSM == PSM_PSMT8 && (fi.FBP==0x1180 || fi.FBP==0) && (fi.TBP0 !=0x3764 && fi.TBP0!=0x370f)) - { - skip = 0; // - } - if(fi.TME && fi.TPSM == PSM_PSMCT16S && (fi.FBP==0x1180 )) - { - skip = 2; // - } - - } - - return true; -} - -bool GSC_ShinOnimusha(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - - if(fi.TME && fi.FBP == 0x001000 && (fi.TBP0 ==0 || fi.TBP0 == 0x0800) && fi.TPSM == PSM_PSMT8H && fi.FBMSK == 0x00FFFFFF) - { - skip = 0; - } - else if(fi.TPSM == PSM_PSMCT24 && fi.TME && fi.FBP == 0x01000) // || fi.FBP == 0x00000 - { - skip = 28; //28 30 56 64 - } - else if(fi.FBP && fi.TPSM == PSM_PSMT8H && fi.FBMSK == 0xFFFFFF) - { - skip = 0; //24 33 40 9 - } - else if(fi.TPSM == PSM_PSMT8H && fi.FBMSK == 0xFF000000) - { - skip = 1; - } - else if(fi.TME && (fi.TBP0 ==0x1400 || fi.TBP0 ==0x1000 ||fi.TBP0 == 0x1200) && (fi.TPSM == PSM_PSMCT32 || fi.TPSM == PSM_PSMCT24)) - { - skip = 1; - } - - } - - return true; -} - -bool GSC_XE3(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TPSM == PSM_PSMT8H && fi.FBMSK >= 0xEFFFFFFF) - { - skip = 73; - } - else if(fi.TME && fi.FBP ==0x03800 && fi.TBP0 && fi.TPSM ==0 && fi.FBMSK == 0) - { - skip = 1; - } - /*else if(fi.TPSM ==0x00000 && PSM_PSMCT24 && fi.TME && fi.FBP == 0x03800) - { - skip = 1 ; - }*/ - /*else if(fi.TME ==0 && (fi.FBP ==0 ) && fi.FPSM == PSM_PSMCT32 && ( fi.TPSM == PSM_PSMT8 || fi.TPSM == PSM_PSMT4) && (fi.FBMSK == 0x00FFFFFF || fi.FBMSK == 0xFF000000)) - { - skip = 1; - }*/ - else - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 1; - } - } - } - } - return true; -} - -bool GSC_GetaWay(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if((fi.FBP ==0 || fi.FBP ==0x1180)&& fi.TPSM == PSM_PSMT8H && fi.FBMSK == 0) - { - skip = 1; - } - } - - return true; -} - -bool GSC_SakuraWarsSoLongMyLove(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME==0 && fi.FBP != fi.TBP0 && fi.TBP0 && fi.FBMSK == 0x00FFFFFF) - { - skip = 3; - } - else if(fi.TME==0 && fi.FBP == fi.TBP0 && (fi.TBP0 ==0x1200 ||fi.TBP0 ==0x1180 ||fi.TBP0 ==0) && fi.FBMSK == 0x00FFFFFF) - { - skip = 3; - } - else if(fi.TME && (fi.FBP ==0 || fi.FBP ==0x1180) && fi.FPSM == PSM_PSMCT32 && fi.TBP0 ==0x3F3F && fi.TPSM == PSM_PSMT8) - { - skip = 1; - } - } - - return true; -} - -bool GSC_FightingBeautyWulong(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.TBP0 ==0x0700 || fi.TBP0 ==0x0a80) && (fi.TPSM == PSM_PSMCT32 || fi.TPSM == PSM_PSMCT24)) - { - skip = 1; - } - } - - return true; -} - -bool GSC_TouristTrophy(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP >= 0x02f00 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180) && fi.TPSM == PSM_PSMT8) - { - skip = 770; - } - if(fi.TME && fi.FBP >= 0x02de0 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 ==0 || fi.TBP0==0x1a40 ||fi.TBP0 ==0x2300) && fi.TPSM == PSM_PSMT8) - { - skip = 770; //480P - } - } - - return true; -} - -bool GSC_GTASanAndreas(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP ==0x0a00 || fi.FBP ==0x08c0) && (fi.TBP0 ==0x1b80 || fi.TBP0 ==0x1a40) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - return true; -} - -bool GSC_FrontMission5(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TPSM == PSM_PSMT8H && fi.FBMSK == 0) - { - skip = 1; - } - if(fi.TME && (fi.FBP ==0x1000) && (fi.TBP0 ==0x2e00 || fi.TBP0 ==0x3200) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32) - { - skip = 1; //fi.TBP0 ==0x1f00 - } - } - - return true; -} - -bool GSC_GodHand(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP ==0x0) && (fi.TBP0 ==0x2800) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - return true; -} - -bool GSC_KnightsOfTheTemple2(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TPSM == PSM_PSMT8H && fi.FBMSK == 0) - { - skip = 1; - } - else if(fi.TPSM ==0x00000 && PSM_PSMCT24 && fi.TME && (fi.FBP ==0x3400 ||fi.FBP==0x3a00)) - { - skip = 1 ; - } - } - - return true; -} - -bool GSC_UltramanFightingEvolution(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP==0x2a00 && fi.FPSM == PSM_PSMZ24 && fi.TBP0 == 0x1c00 && fi.TPSM == PSM_PSMZ24) - { - skip = 5; // blur - } - } - - return true; -} - -bool GSC_DeathByDegreesTekkenNinaWilliams(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP ==0 ) && fi.TBP0==0x34a0 && (fi.TPSM == PSM_PSMCT32)) - { - skip = 1; - } - else if((fi.FBP ==0x3500)&& fi.TPSM == PSM_PSMT8 && fi.FBMSK == 0xFFFF00FF) - { - skip = 4; - } - } - if(fi.TME) - { - if((fi.FBP | fi.TBP0 | fi.FPSM | fi.TPSM) && (fi.FBMSK == 0x00FFFFFF )) - { - skip = 1; - } - } - return true; -} - -bool GSC_AlpineRacer3(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(!fi.TME && fi.FBP == 0 && (fi.FBMSK ==0x0001 ||fi.FBMSK == 0x00FFFFFF)) - { - skip = 2; - } - } - - return true; -} - -bool GSC_HummerBadlands(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP ==0x0a00) && (fi.TBP0 ==0x03200 || fi.TBP0==0x3700) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - return true; -} - -bool GSC_SengokuBasara(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && (fi.TBP0==0x1800 ) && fi.FBMSK==0xFF000000) - { - skip = 1; - } - } - - return true; -} - -bool GSC_Grandia3(const GSFrameInfo& fi, int& skip) // DX ONLY -{ - if(skip == 0) - { - if(fi.TME && (fi.FBP ==0x0 || fi.FBP ==0x0e00) && (fi.TBP0 ==0x2a00 ||fi.TBP0==0x0e00 ||fi.TBP0==0) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - } - - - return true; -} - -bool GSC_FinalFightStreetwise(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(!fi.TME && (fi.FBP == 0 || fi.FBP == 0x08c0) && fi.FPSM == PSM_PSMCT32 && (fi.TPSM == PSM_PSMT8 || fi.TPSM == PSM_PSMT4) && fi.FBMSK == 0x00FFFFFF) - { - skip = 3; - } - } - - return true; -} - -bool GSC_TalesofSymphonia(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x2bc0 || fi.TBP0 <= 0x0200) && (fi.FBMSK==0xFF000000 ||fi.FBMSK==0x00FFFFFF)) - { - skip = 1; //fi.FBMSK==0 - } - if(fi.TME && (fi.TBP0==0x1180 || fi.TBP0==0x1a40 || fi.TBP0==0x2300) && fi.FBMSK>=0xFF000000) - { - skip = 1; - } - } - - return true; -} - -bool GSC_SoulCalibur2(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 2; - } - } - } - - return true; -} - -bool GSC_SoulCalibur3(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = 2; - } - } - } - - return true; -} - -bool GSC_Simple2000Vol114(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME==0 && (fi.FBP==0x1500) && (fi.TBP0==0x2c97 || fi.TBP0==0x2ace || fi.TBP0==0x03d0 || fi.TBP0==0x2448) && (fi.FBMSK == 0x0000)) - { - skip = 1; - } - if(fi.TME && (fi.FBP==0x0e00) && (fi.TBP0==0x1000) && (fi.FBMSK == 0x0000)) - { - skip = 1; - } - } - return true; -} - -bool GSC_UrbanReign(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - if(fi.TME && fi.FBP==0x0000 && fi.TBP0==0x3980 && fi.FPSM==fi.TPSM && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0x0) - { - skip = 1; - } - } - return true; -} - -bool GSC_SteambotChronicles(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - // Author: miseru99 on forums.pcsx2.net - if(fi.TME && fi.TPSM == PSM_PSMCT16S) - { - if(fi.FBP == 0x1180) - { - skip=1;//1 deletes some of the glitched effects - } - else if(fi.FBP == 0) - { - skip=100;//deletes most others(too high deletes the buggy sea completely;c, too low causes glitches to be visible) - } - else if(Aggresive && fi.FBP != 0)//Agressive CRC - { - skip=19;//"speedhack", makes the game very light, vaporized water can disappear when not looked at directly, possibly some interface still, other value to try: 6 breaks menu background, possibly nothing(?) during gameplay, but it's slower, hence not much of a speedhack anymore - } - } - } - return true; -} - -#undef Agressive - -#ifdef ENABLE_DYNAMIC_CRC_HACK - -#include -/*************************************************************************** - AutoReloadLibrary : Automatically reloads a dll if the file was modified. - Uses a temporary copy of the watched dll such that the original - can be modified while the copy is loaded and used. - - NOTE: The API is not platform specific, but current implementation is Win32. -***************************************************************************/ -class AutoReloadLibrary -{ -private: - string m_dllPath, m_loadedDllPath; - DWORD m_minMsBetweenProbes; - time_t m_lastFileModification; - DWORD m_lastProbe; - HMODULE m_library; - - string GetTempName() - { - string result = m_loadedDllPath + ".tmp"; //default name - TCHAR tmpPath[MAX_PATH], tmpName[MAX_PATH]; - DWORD ret = GetTempPath(MAX_PATH, tmpPath); - if(ret && ret <= MAX_PATH && GetTempFileName(tmpPath, TEXT("GSdx"), 0, tmpName)) - result = tmpName; - - return result; - }; - - void UnloadLib() - { - if( !m_library ) - return; - - FreeLibrary( m_library ); - m_library = NULL; - - // If can't delete (might happen when GSdx closes), schedule delete on reboot - if(!DeleteFile( m_loadedDllPath.c_str() ) ) - MoveFileEx( m_loadedDllPath.c_str(), NULL, MOVEFILE_DELAY_UNTIL_REBOOT ); - } - -public: - AutoReloadLibrary( const string dllPath, const int minMsBetweenProbes=100 ) - : m_minMsBetweenProbes( minMsBetweenProbes ) - , m_dllPath( dllPath ) - , m_lastFileModification( 0 ) - , m_lastProbe( 0 ) - , m_library( 0 ) - {}; - - ~AutoReloadLibrary(){ UnloadLib(); }; - - // If timeout has ellapsed, probe the dll for change, and reload if it was changed. - // If it returns true, then the dll was freed/reloaded, and any symbol addresse previously obtained is now invalid and needs to be re-obtained. - // Overhead is very low when when probe timeout has not ellapsed, and especially if current timestamp is supplied as argument. - // Note: there's no relation between the file modification date and currentMs value, so it need'nt neccessarily be an actual timestamp. - // Note: isChanged is guarenteed to return true at least once - // (even if the file doesn't exist, at which case the following GetSymbolAddress will return NULL) - bool isChanged( const DWORD currentMs=0 ) - { - DWORD current = currentMs? currentMs : GetTickCount(); - if( current >= m_lastProbe && ( current - m_lastProbe ) < m_minMsBetweenProbes ) - return false; - - bool firstTime = !m_lastProbe; - m_lastProbe = current; - - struct stat s; - if( stat( m_dllPath.c_str(), &s ) ) - { - // File doesn't exist or other error, unload dll - bool wasLoaded = m_library?true:false; - UnloadLib(); - return firstTime || wasLoaded; // Changed if previously loaded or the first time accessing this method (and file doesn't exist) - } - - if( m_lastFileModification == s.st_mtime ) - return false; - m_lastFileModification = s.st_mtime; - - // File modified, reload - UnloadLib(); - - if( !CopyFile( m_dllPath.c_str(), ( m_loadedDllPath = GetTempName() ).c_str(), false ) ) - return true; - - m_library = LoadLibrary( m_loadedDllPath.c_str() ); - return true; - }; - - // Return value is NULL if the dll isn't loaded (failure or doesn't exist) or if the symbol isn't found. - void* GetSymbolAddress( const char* name ){ return m_library? GetProcAddress( m_library, name ) : NULL; }; -}; - - -// Use DynamicCrcHack function from a dll which can be modified while GSdx/PCSX2 is running. -// return value is true if the call succeeded or false otherwise (If the hack could not be invoked: no dll/function/etc). -// result contains the result of the hack call. - -typedef uint32 (__cdecl* DynaHackType)(uint32, uint32, uint32, uint32, uint32, uint32, uint32, int32*, uint32, int32); -typedef uint32 (__cdecl* DynaHackType2)(uint32, uint32, uint32, uint32, uint32, uint32, uint32, int32*, uint32, int32, uint32); // Also accept CRC - -bool IsInvokedDynamicCrcHack( GSFrameInfo &fi, int& skip, int region, bool &result, uint32 crc ) -{ - static AutoReloadLibrary dll( DYNA_DLL_PATH ); - static DynaHackType dllFunc = NULL; - static DynaHackType2 dllFunc2 = NULL; - - if( dll.isChanged() ) - { - dllFunc = (DynaHackType)dll.GetSymbolAddress( "DynamicCrcHack" ); - dllFunc2 = (DynaHackType2)dll.GetSymbolAddress( "DynamicCrcHack2" ); - printf( "GSdx: Dynamic CRC-hacks%s: %s\n", - ((dllFunc && !dllFunc2)?" [Old dynaDLL - No CRC support]":""), - dllFunc? "Loaded OK (-> overriding internal hacks)" : - "Not available (-> using internal hacks)"); - } - - if( !dllFunc2 && !dllFunc ) - return false; - - int32 skip32 = skip; - bool hasSharedBits = GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM); - if(dllFunc2) - result = dllFunc2( fi.FBP, fi.FPSM, fi.FBMSK, fi.TBP0, fi.TPSM, fi.TZTST, (uint32)fi.TME, &skip32, (uint32)region, (uint32)(hasSharedBits?1:0), crc )?true:false; - else - result = dllFunc( fi.FBP, fi.FPSM, fi.FBMSK, fi.TBP0, fi.TPSM, fi.TZTST, (uint32)fi.TME, &skip32, (uint32)region, (uint32)(hasSharedBits?1:0) )?true:false; - skip = skip32; - - return true; -} - -#endif - -bool GSState::IsBadFrame(int& skip, int UserHacks_SkipDraw) -{ - GSFrameInfo fi; - - fi.FBP = m_context->FRAME.Block(); - fi.FPSM = m_context->FRAME.PSM; - fi.FBMSK = m_context->FRAME.FBMSK; - fi.TME = PRIM->TME; - fi.TBP0 = m_context->TEX0.TBP0; - fi.TPSM = m_context->TEX0.PSM; - fi.TZTST = m_context->TEST.ZTST; - - static GetSkipCount map[CRC::TitleCount]; - - if (!m_crcinited) - { - m_crcinited = true; - - memset(map, 0, sizeof(map)); - - if (s_crc_hack_level > 1) { - map[CRC::AceCombat4] = GSC_AceCombat4; - map[CRC::AlpineRacer3] = GSC_AlpineRacer3; - map[CRC::BlackHawkDown] = GSC_BlackHawkDown; - map[CRC::BleachBladeBattlers] = GSC_BleachBladeBattlers; - map[CRC::BullyCC] = GSC_BullyCC; // Bully is fixed, maybe this one too? - map[CRC::BurnoutDominator] = GSC_Burnout; - map[CRC::BurnoutRevenge] = GSC_Burnout; - map[CRC::BurnoutTakedown] = GSC_Burnout; - map[CRC::CaptainTsubasa] = GSC_CaptainTsubasa; - map[CRC::CrashBandicootWoC] = GSC_CrashBandicootWoC; - map[CRC::CrashNburn] = GSC_CrashNburn; - map[CRC::DBZBT2] = GSC_DBZBT2; - map[CRC::DBZBT3] = GSC_DBZBT3; - map[CRC::DeathByDegreesTekkenNinaWilliams] = GSC_DeathByDegreesTekkenNinaWilliams; - map[CRC::DevilMayCry3] = GSC_DevilMayCry3; - map[CRC::EternalPoison] = GSC_EternalPoison; - map[CRC::EvangelionJo] = GSC_EvangelionJo; - map[CRC::FFVIIDoC] = GSC_FFVIIDoC; - map[CRC::FightingBeautyWulong] = GSC_FightingBeautyWulong; - map[CRC::FinalFightStreetwise] = GSC_FinalFightStreetwise; - map[CRC::FrontMission5] = GSC_FrontMission5; - map[CRC::Genji] = GSC_Genji; - map[CRC::GetaWayBlackMonday] = GSC_GetaWay; - map[CRC::GetaWay] = GSC_GetaWay; - map[CRC::GodHand] = GSC_GodHand; - map[CRC::GT3] = GSC_GT3; - map[CRC::GT4] = GSC_GT4; - map[CRC::GTASanAndreas] = GSC_GTASanAndreas; - map[CRC::GTConcept] = GSC_GTConcept; - map[CRC::HauntingGround] = GSC_HauntingGround; - map[CRC::HeavyMetalThunder] = GSC_HeavyMetalThunder; - map[CRC::HummerBadlands] = GSC_HummerBadlands; - map[CRC::ICO] = GSC_ICO; - map[CRC::IkkiTousen] = GSC_IkkiTousen; - map[CRC::JamesBondEverythingOrNothing] = GSC_JamesBondEverythingOrNothing; - map[CRC::KnightsOfTheTemple2] = GSC_KnightsOfTheTemple2; - map[CRC::Kunoichi] = GSC_Kunoichi; - map[CRC::LordOfTheRingsThirdAge] = GSC_LordOfTheRingsThirdAge; - map[CRC::Manhunt2] = GSC_Manhunt2; - map[CRC::MetalGearSolid3] = GSC_MetalGearSolid3; - map[CRC::MidnightClub3] = GSC_MidnightClub3; - map[CRC::NanoBreaker] = GSC_NanoBreaker; - map[CRC::NarutimateAccel] = GSC_NarutimateAccel; - map[CRC::Naruto] = GSC_Naruto; - map[CRC::Oneechanbara2Special] = GSC_Oneechanbara2Special; - map[CRC::Onimusha3] = GSC_Onimusha3; - map[CRC::RedDeadRevolver] = GSC_RedDeadRevolver; - map[CRC::ResidentEvil4] = GSC_ResidentEvil4; - map[CRC::SacredBlaze] = GSC_SacredBlaze; - map[CRC::SakuraTaisen] = GSC_SakuraTaisen; - map[CRC::SakuraWarsSoLongMyLove] = GSC_SakuraWarsSoLongMyLove; - map[CRC::SengokuBasara] = GSC_SengokuBasara; - map[CRC::ShadowofRome] = GSC_ShadowofRome; - map[CRC::ShinOnimusha] = GSC_ShinOnimusha; - map[CRC::Simple2000Vol114] = GSC_Simple2000Vol114; - map[CRC::SkyGunner] = GSC_SkyGunner; - map[CRC::SoulCalibur2] = GSC_SoulCalibur2; - map[CRC::SoulCalibur3] = GSC_SoulCalibur3; - map[CRC::Spartan] = GSC_Spartan; - map[CRC::StarWarsBattlefront2] = GSC_StarWarsBattlefront2; - map[CRC::StarWarsBattlefront] = GSC_StarWarsBattlefront; - map[CRC::StarWarsForceUnleashed] = GSC_StarWarsForceUnleashed; - map[CRC::SteambotChronicles] = GSC_SteambotChronicles; - map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss; - map[CRC::TalesOfLegendia] = GSC_TalesOfLegendia; - map[CRC::TalesofSymphonia] = GSC_TalesofSymphonia; - map[CRC::Tekken5] = GSC_Tekken5; - map[CRC::TimeSplitters2] = GSC_TimeSplitters2; - map[CRC::TombRaiderAnniversary] = GSC_TombRaider; - map[CRC::TombRaiderLegend] = GSC_TombRaiderLegend; - map[CRC::TombRaiderUnderworld] = GSC_TombRaiderUnderWorld; - map[CRC::TouristTrophy] = GSC_TouristTrophy; - map[CRC::UltramanFightingEvolution] = GSC_UltramanFightingEvolution; - map[CRC::UrbanReign] = GSC_UrbanReign; - map[CRC::WildArms4] = GSC_WildArms4; - map[CRC::WildArms5] = GSC_WildArms5; - map[CRC::Yakuza2] = GSC_Yakuza2; - map[CRC::Yakuza] = GSC_Yakuza; - map[CRC::ZettaiZetsumeiToshi2] = GSC_ZettaiZetsumeiToshi2; - // Only Aggresive - map[CRC::FFX2] = GSC_FFX2; - map[CRC::FFX] = GSC_FFX; - map[CRC::FFXII] = GSC_FFXII; - map[CRC::SMTDDS1] = GSC_SMTNocturneDDS<0x203BA820>; - map[CRC::SMTDDS2] = GSC_SMTNocturneDDS<0x20435BF0>; - map[CRC::SMTNocturne] = GSC_SMTNocturneDDS<0x2054E870>; - map[CRC::SoTC] = GSC_SoTC; - map[CRC::SSX3] = GSC_SSX3; - } - - // Hack that were fixed on openGL - if (Dx_only) { - map[CRC::Bully] = GSC_Bully; - map[CRC::GodOfWar2] = GSC_GodOfWar2; - map[CRC::LordOfTheRingsTwoTowers] = GSC_LordOfTheRingsTwoTowers; - map[CRC::Okami] = GSC_Okami; - map[CRC::SimpsonsGame] = GSC_SimpsonsGame; - map[CRC::SuikodenTactics] = GSC_SuikodenTactics; - map[CRC::XE3] = GSC_XE3; - - // Not tested but must be fixed with texture shuffle - map[CRC::BigMuthaTruckers] = GSC_BigMuthaTruckers; - map[CRC::DemonStone] = GSC_DemonStone; - map[CRC::GiTS] = GSC_GiTS; - map[CRC::LegoBatman] = GSC_LegoBatman; - map[CRC::OnePieceGrandAdventure] = GSC_OnePieceGrandAdventure; - map[CRC::OnePieceGrandBattle] = GSC_OnePieceGrandBattle; - map[CRC::SFEX3] = GSC_SFEX3; - map[CRC::SpyroEternalNight] = GSC_SpyroEternalNight; - map[CRC::SpyroNewBeginning] = GSC_SpyroNewBeginning; - map[CRC::SonicUnleashed] = GSC_SonicUnleashed; - map[CRC::TenchuFS] = GSC_Tenchu; - map[CRC::TenchuWoH] = GSC_Tenchu; - - // Those games might requires accurate fbmask - map[CRC::Sly2] = GSC_Sly2; - map[CRC::Sly3] = GSC_Sly3; - - // Those games require accurate_colclip (perf) - map[CRC::CastlevaniaCoD] = GSC_Castlevania; - map[CRC::CastlevaniaLoI] = GSC_Castlevania; - map[CRC::GodOfWar] = GSC_GodOfWar; - - // Those games emulate a stencil buffer with the alpha channel of the RT (Slow) - map[CRC::RadiataStories] = GSC_RadiataStories; - map[CRC::StarOcean3] = GSC_StarOcean3; - map[CRC::ValkyrieProfile2] = GSC_ValkyrieProfile2; - - // Deprecated hack could be removed (Cutie) - map[CRC::Grandia3] = GSC_Grandia3; - - // At least a part of the CRC is fixed with texture shuffle. - // The status of post-processing effect is unknown - map[CRC::Black] = GSC_Black; - } - } - - // TODO: just set gsc in SetGameCRC once - - GetSkipCount gsc = map[m_game.title]; - g_crc_region = m_game.region; - -#ifdef ENABLE_DYNAMIC_CRC_HACK - bool res=false; if(IsInvokedDynamicCrcHack(fi, skip, g_crc_region, res, m_crc)){ if( !res ) return false; } else -#endif - if(gsc && !gsc(fi, skip)) - { - return false; - } - - if(skip == 0 && (UserHacks_SkipDraw > 0) ) - { - if(fi.TME) - { - // depth textures (bully, mgs3s1 intro, Front Mission 5) - if( (fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) || - // General, often problematic post processing - (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)) ) - { - skip = UserHacks_SkipDraw; - } - } - } -#ifdef ENABLE_OGL_DEBUG - else if (fi.TME) { - if(fi.TPSM == PSM_PSMZ32 || fi.TPSM == PSM_PSMZ24 || fi.TPSM == PSM_PSMZ16 || fi.TPSM == PSM_PSMZ16S) - GL_INS("!!! Depth Texture 0x%x!!!", fi.TPSM); - } -#endif - - if(skip > 0) - { - skip--; - - return true; - } - - return false; -} diff --git a/plugins/GSdx_legacy/GSState.h b/plugins/GSdx_legacy/GSState.h deleted file mode 100644 index 1005987c34..0000000000 --- a/plugins/GSdx_legacy/GSState.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSLocalMemory.h" -#include "GSDrawingContext.h" -#include "GSDrawingEnvironment.h" -#include "GSVertex.h" -#include "GSVertexTrace.h" -#include "GSUtil.h" -#include "GSPerfMon.h" -#include "GSVector.h" -#include "GSDevice.h" -#include "GSCrc.h" -#include "GSAlignedClass.h" -#include "GSDump.h" - -class GSState : public GSAlignedClass<32> -{ - // RESTRICT prevents multiple loads of the same part of the register when accessing its bitfields (the compiler is happy to know that memory writes in-between will not go there) - - typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* RESTRICT r); - - GIFPackedRegHandler m_fpGIFPackedRegHandlers[16]; - GIFPackedRegHandler m_fpGIFPackedRegHandlerXYZ[8][4]; - - void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r); - void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r); - void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r); - void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r); - void GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r); - template void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r); - template void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r); - void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r); - void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r); - void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r); - - typedef void (GSState::*GIFRegHandler)(const GIFReg* RESTRICT r); - - GIFRegHandler m_fpGIFRegHandlers[256]; - GIFRegHandler m_fpGIFRegHandlerXYZ[8][4]; - - typedef void (GSState::*GIFPackedRegHandlerC)(const GIFPackedReg* RESTRICT r, uint32 size); - - GIFPackedRegHandlerC m_fpGIFPackedRegHandlersC[2]; - GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8]; - GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZ2[8]; - - template void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size); - template void GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, uint32 size); - void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size); - - template void ApplyTEX0(GIFRegTEX0& TEX0); - void ApplyPRIM(uint32 prim); - - void GIFRegHandlerNull(const GIFReg* RESTRICT r); - void GIFRegHandlerPRIM(const GIFReg* RESTRICT r); - void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r); - void GIFRegHandlerST(const GIFReg* RESTRICT r); - void GIFRegHandlerUV(const GIFReg* RESTRICT r); - void GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r); - template void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r); - template void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r); - template void GIFRegHandlerTEX0(const GIFReg* RESTRICT r); - template void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r); - void GIFRegHandlerFOG(const GIFReg* RESTRICT r); - void GIFRegHandlerNOP(const GIFReg* RESTRICT r); - template void GIFRegHandlerTEX1(const GIFReg* RESTRICT r); - template void GIFRegHandlerTEX2(const GIFReg* RESTRICT r); - template void GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r); - void GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r); - void GIFRegHandlerPRMODE(const GIFReg* RESTRICT r); - void GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r); - void GIFRegHandlerSCANMSK(const GIFReg* RESTRICT r); - template void GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r); - template void GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r); - void GIFRegHandlerTEXA(const GIFReg* RESTRICT r); - void GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r); - void GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r); - template void GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r); - template void GIFRegHandlerALPHA(const GIFReg* RESTRICT r); - void GIFRegHandlerDIMX(const GIFReg* RESTRICT r); - void GIFRegHandlerDTHE(const GIFReg* RESTRICT r); - void GIFRegHandlerCOLCLAMP(const GIFReg* RESTRICT r); - template void GIFRegHandlerTEST(const GIFReg* RESTRICT r); - void GIFRegHandlerPABE(const GIFReg* RESTRICT r); - template void GIFRegHandlerFBA(const GIFReg* RESTRICT r); - template void GIFRegHandlerFRAME(const GIFReg* RESTRICT r); - template void GIFRegHandlerZBUF(const GIFReg* RESTRICT r); - void GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r); - void GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r); - void GIFRegHandlerTRXREG(const GIFReg* RESTRICT r); - void GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r); - void GIFRegHandlerHWREG(const GIFReg* RESTRICT r); - void GIFRegHandlerSIGNAL(const GIFReg* RESTRICT r); - void GIFRegHandlerFINISH(const GIFReg* RESTRICT r); - void GIFRegHandlerLABEL(const GIFReg* RESTRICT r); - - int m_version; - int m_sssize; - - bool m_mt; - void (*m_irq)(); - bool m_path3hack; - bool m_init_read_fifo_supported; - - struct GSTransferBuffer - { - int x, y; - int start, end, total; - bool overflow; - uint8* buff; - - GSTransferBuffer(); - virtual ~GSTransferBuffer(); - - void Init(int tx, int ty); - bool Update(int tw, int th, int bpp, int& len); - - } m_tr; - -protected: - bool IsBadFrame(int& skip, int UserHacks_SkipDraw); - - int UserHacks_WildHack; - bool isPackedUV_HackFlag; - int m_crc_hack_level; - - GSVertex m_v; - float m_q; - GSVector4i m_scissor; - GSVector4i m_ofxy; - bool m_texflush; - - struct - { - GSVertex* buff; - size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1 - size_t xy_tail; - uint64 xy[4]; - } m_vertex; - - struct - { - uint32* buff; - size_t tail; - } m_index; - - void UpdateContext(); - void UpdateScissor(); - - virtual void UpdateVertexKick(); - - void GrowVertexBuffer(); - - template - void VertexKick(uint32 skip); - - // following functions need m_vt to be initialized - - GSVertexTrace m_vt; - - void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear); - void GetAlphaMinMax(); - bool TryAlphaTest(uint32& fm, uint32& zm); - bool IsOpaque(); - bool IsMipMapActive(); - -public: - GIFPath m_path[4]; - GIFRegPRIM* PRIM; - GSPrivRegSet* m_regs; - GSLocalMemory m_mem; - GSDrawingEnvironment m_env; - GSDrawingContext* m_context; - GSPerfMon m_perfmon; - uint32 m_crc; - int m_options; - int m_frameskip; - bool m_crcinited; - bool m_framelimit; - CRC::Game m_game; - GSDump m_dump; - bool m_nativeres; - bool m_mipmap; - - int s_n; - bool s_dump; - bool s_save; - bool s_savet; - bool s_savez; - bool s_savef; - int s_saven; - int s_savel; - -public: - GSState(); - virtual ~GSState(); - - void ResetHandlers(); - - GSVector4i GetDisplayRect(int i = -1); - GSVector4i GetFrameRect(int i = -1); - GSVector2i GetDeviceSize(int i = -1); - - bool IsEnabled(int i); - - float GetTvRefreshRate(); - - virtual void Reset(); - virtual void Flush(); - virtual void FlushPrim(); - virtual void FlushWrite(); - virtual void Draw() = 0; - virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {} - virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {} - - void Move(); - void Write(const uint8* mem, int len); - void Read(uint8* mem, int len); - void InitReadFIFO(uint8* mem, int len); - - void SoftReset(uint32 mask); - void WriteCSR(uint32 csr) {m_regs->CSR.u32[1] = csr;} - void ReadFIFO(uint8* mem, int size); - template void Transfer(const uint8* mem, uint32 size); - int Freeze(GSFreezeData* fd, bool sizeonly); - int Defrost(const GSFreezeData* fd); - void GetLastTag(uint32* tag) {*tag = m_path3hack; m_path3hack = 0;} - virtual void SetGameCRC(uint32 crc, int options); - void SetFrameSkip(int skip); - void SetRegsMem(uint8* basemem); - void SetIrqCallback(void (*irq)()); - void SetMultithreaded(bool mt = true); -}; - diff --git a/plugins/GSdx_legacy/GSTables.cpp b/plugins/GSdx_legacy/GSTables.cpp deleted file mode 100644 index aad7360eae..0000000000 --- a/plugins/GSdx_legacy/GSTables.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTables.h" - -const uint8 blockTable32[4][8] = -{ - { 0, 1, 4, 5, 16, 17, 20, 21}, - { 2, 3, 6, 7, 18, 19, 22, 23}, - { 8, 9, 12, 13, 24, 25, 28, 29}, - { 10, 11, 14, 15, 26, 27, 30, 31} -}; - -const uint8 blockTable32Z[4][8] = -{ - { 24, 25, 28, 29, 8, 9, 12, 13}, - { 26, 27, 30, 31, 10, 11, 14, 15}, - { 16, 17, 20, 21, 0, 1, 4, 5}, - { 18, 19, 22, 23, 2, 3, 6, 7} -}; - -const uint8 blockTable16[8][4] = -{ - { 0, 2, 8, 10 }, - { 1, 3, 9, 11 }, - { 4, 6, 12, 14 }, - { 5, 7, 13, 15 }, - { 16, 18, 24, 26 }, - { 17, 19, 25, 27 }, - { 20, 22, 28, 30 }, - { 21, 23, 29, 31 } -}; - -const uint8 blockTable16S[8][4] = -{ - { 0, 2, 16, 18 }, - { 1, 3, 17, 19 }, - { 8, 10, 24, 26 }, - { 9, 11, 25, 27 }, - { 4, 6, 20, 22 }, - { 5, 7, 21, 23 }, - { 12, 14, 28, 30 }, - { 13, 15, 29, 31 } -}; - -const uint8 blockTable16Z[8][4] = -{ - { 24, 26, 16, 18 }, - { 25, 27, 17, 19 }, - { 28, 30, 20, 22 }, - { 29, 31, 21, 23 }, - { 8, 10, 0, 2 }, - { 9, 11, 1, 3 }, - { 12, 14, 4, 6 }, - { 13, 15, 5, 7 } -}; - -const uint8 blockTable16SZ[8][4] = -{ - { 24, 26, 8, 10 }, - { 25, 27, 9, 11 }, - { 16, 18, 0, 2 }, - { 17, 19, 1, 3 }, - { 28, 30, 12, 14 }, - { 29, 31, 13, 15 }, - { 20, 22, 4, 6 }, - { 21, 23, 5, 7 } -}; - -const uint8 blockTable8[4][8] = -{ - { 0, 1, 4, 5, 16, 17, 20, 21}, - { 2, 3, 6, 7, 18, 19, 22, 23}, - { 8, 9, 12, 13, 24, 25, 28, 29}, - { 10, 11, 14, 15, 26, 27, 30, 31} -}; - -const uint8 blockTable4[8][4] = -{ - { 0, 2, 8, 10 }, - { 1, 3, 9, 11 }, - { 4, 6, 12, 14 }, - { 5, 7, 13, 15 }, - { 16, 18, 24, 26 }, - { 17, 19, 25, 27 }, - { 20, 22, 28, 30 }, - { 21, 23, 29, 31 } -}; - -const uint8 columnTable32[8][8] = -{ - { 0, 1, 4, 5, 8, 9, 12, 13 }, - { 2, 3, 6, 7, 10, 11, 14, 15 }, - { 16, 17, 20, 21, 24, 25, 28, 29 }, - { 18, 19, 22, 23, 26, 27, 30, 31 }, - { 32, 33, 36, 37, 40, 41, 44, 45 }, - { 34, 35, 38, 39, 42, 43, 46, 47 }, - { 48, 49, 52, 53, 56, 57, 60, 61 }, - { 50, 51, 54, 55, 58, 59, 62, 63 }, -}; - -const uint8 columnTable16[8][16] = -{ - { 0, 2, 8, 10, 16, 18, 24, 26, - 1, 3, 9, 11, 17, 19, 25, 27 }, - { 4, 6, 12, 14, 20, 22, 28, 30, - 5, 7, 13, 15, 21, 23, 29, 31 }, - { 32, 34, 40, 42, 48, 50, 56, 58, - 33, 35, 41, 43, 49, 51, 57, 59 }, - { 36, 38, 44, 46, 52, 54, 60, 62, - 37, 39, 45, 47, 53, 55, 61, 63 }, - { 64, 66, 72, 74, 80, 82, 88, 90, - 65, 67, 73, 75, 81, 83, 89, 91 }, - { 68, 70, 76, 78, 84, 86, 92, 94, - 69, 71, 77, 79, 85, 87, 93, 95 }, - { 96, 98, 104, 106, 112, 114, 120, 122, - 97, 99, 105, 107, 113, 115, 121, 123 }, - { 100, 102, 108, 110, 116, 118, 124, 126, - 101, 103, 109, 111, 117, 119, 125, 127 }, -}; - -const uint8 columnTable8[16][16] = -{ - { 0, 4, 16, 20, 32, 36, 48, 52, // column 0 - 2, 6, 18, 22, 34, 38, 50, 54 }, - { 8, 12, 24, 28, 40, 44, 56, 60, - 10, 14, 26, 30, 42, 46, 58, 62 }, - { 33, 37, 49, 53, 1, 5, 17, 21, - 35, 39, 51, 55, 3, 7, 19, 23 }, - { 41, 45, 57, 61, 9, 13, 25, 29, - 43, 47, 59, 63, 11, 15, 27, 31 }, - { 96, 100, 112, 116, 64, 68, 80, 84, // column 1 - 98, 102, 114, 118, 66, 70, 82, 86 }, - { 104, 108, 120, 124, 72, 76, 88, 92, - 106, 110, 122, 126, 74, 78, 90, 94 }, - { 65, 69, 81, 85, 97, 101, 113, 117, - 67, 71, 83, 87, 99, 103, 115, 119 }, - { 73, 77, 89, 93, 105, 109, 121, 125, - 75, 79, 91, 95, 107, 111, 123, 127 }, - { 128, 132, 144, 148, 160, 164, 176, 180, // column 2 - 130, 134, 146, 150, 162, 166, 178, 182 }, - { 136, 140, 152, 156, 168, 172, 184, 188, - 138, 142, 154, 158, 170, 174, 186, 190 }, - { 161, 165, 177, 181, 129, 133, 145, 149, - 163, 167, 179, 183, 131, 135, 147, 151 }, - { 169, 173, 185, 189, 137, 141, 153, 157, - 171, 175, 187, 191, 139, 143, 155, 159 }, - { 224, 228, 240, 244, 192, 196, 208, 212, // column 3 - 226, 230, 242, 246, 194, 198, 210, 214 }, - { 232, 236, 248, 252, 200, 204, 216, 220, - 234, 238, 250, 254, 202, 206, 218, 222 }, - { 193, 197, 209, 213, 225, 229, 241, 245, - 195, 199, 211, 215, 227, 231, 243, 247 }, - { 201, 205, 217, 221, 233, 237, 249, 253, - 203, 207, 219, 223, 235, 239, 251, 255 }, -}; - -const uint16 columnTable4[16][32] = -{ - { 0, 8, 32, 40, 64, 72, 96, 104, // column 0 - 2, 10, 34, 42, 66, 74, 98, 106, - 4, 12, 36, 44, 68, 76, 100, 108, - 6, 14, 38, 46, 70, 78, 102, 110 }, - { 16, 24, 48, 56, 80, 88, 112, 120, - 18, 26, 50, 58, 82, 90, 114, 122, - 20, 28, 52, 60, 84, 92, 116, 124, - 22, 30, 54, 62, 86, 94, 118, 126 }, - { 65, 73, 97, 105, 1, 9, 33, 41, - 67, 75, 99, 107, 3, 11, 35, 43, - 69, 77, 101, 109, 5, 13, 37, 45, - 71, 79, 103, 111, 7, 15, 39, 47 }, - { 81, 89, 113, 121, 17, 25, 49, 57, - 83, 91, 115, 123, 19, 27, 51, 59, - 85, 93, 117, 125, 21, 29, 53, 61, - 87, 95, 119, 127, 23, 31, 55, 63 }, - { 192, 200, 224, 232, 128, 136, 160, 168, // column 1 - 194, 202, 226, 234, 130, 138, 162, 170, - 196, 204, 228, 236, 132, 140, 164, 172, - 198, 206, 230, 238, 134, 142, 166, 174 }, - { 208, 216, 240, 248, 144, 152, 176, 184, - 210, 218, 242, 250, 146, 154, 178, 186, - 212, 220, 244, 252, 148, 156, 180, 188, - 214, 222, 246, 254, 150, 158, 182, 190 }, - { 129, 137, 161, 169, 193, 201, 225, 233, - 131, 139, 163, 171, 195, 203, 227, 235, - 133, 141, 165, 173, 197, 205, 229, 237, - 135, 143, 167, 175, 199, 207, 231, 239 }, - { 145, 153, 177, 185, 209, 217, 241, 249, - 147, 155, 179, 187, 211, 219, 243, 251, - 149, 157, 181, 189, 213, 221, 245, 253, - 151, 159, 183, 191, 215, 223, 247, 255 }, - { 256, 264, 288, 296, 320, 328, 352, 360, // column 2 - 258, 266, 290, 298, 322, 330, 354, 362, - 260, 268, 292, 300, 324, 332, 356, 364, - 262, 270, 294, 302, 326, 334, 358, 366 }, - { 272, 280, 304, 312, 336, 344, 368, 376, - 274, 282, 306, 314, 338, 346, 370, 378, - 276, 284, 308, 316, 340, 348, 372, 380, - 278, 286, 310, 318, 342, 350, 374, 382 }, - { 321, 329, 353, 361, 257, 265, 289, 297, - 323, 331, 355, 363, 259, 267, 291, 299, - 325, 333, 357, 365, 261, 269, 293, 301, - 327, 335, 359, 367, 263, 271, 295, 303 }, - { 337, 345, 369, 377, 273, 281, 305, 313, - 339, 347, 371, 379, 275, 283, 307, 315, - 341, 349, 373, 381, 277, 285, 309, 317, - 343, 351, 375, 383, 279, 287, 311, 319 }, - { 448, 456, 480, 488, 384, 392, 416, 424, // column 3 - 450, 458, 482, 490, 386, 394, 418, 426, - 452, 460, 484, 492, 388, 396, 420, 428, - 454, 462, 486, 494, 390, 398, 422, 430 }, - { 464, 472, 496, 504, 400, 408, 432, 440, - 466, 474, 498, 506, 402, 410, 434, 442, - 468, 476, 500, 508, 404, 412, 436, 444, - 470, 478, 502, 510, 406, 414, 438, 446 }, - { 385, 393, 417, 425, 449, 457, 481, 489, - 387, 395, 419, 427, 451, 459, 483, 491, - 389, 397, 421, 429, 453, 461, 485, 493, - 391, 399, 423, 431, 455, 463, 487, 495 }, - { 401, 409, 433, 441, 465, 473, 497, 505, - 403, 411, 435, 443, 467, 475, 499, 507, - 405, 413, 437, 445, 469, 477, 501, 509, - 407, 415, 439, 447, 471, 479, 503, 511 }, -}; - -const uint8 clutTableT32I8[128] = -{ - 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, - 64, 65, 68, 69, 72, 73, 76, 77, 66, 67, 70, 71, 74, 75, 78, 79, - 16, 17, 20, 21, 24, 25, 28, 29, 18, 19, 22, 23, 26, 27, 30, 31, - 80, 81, 84, 85, 88, 89, 92, 93, 82, 83, 86, 87, 90, 91, 94, 95, - 32, 33, 36, 37, 40, 41, 44, 45, 34, 35, 38, 39, 42, 43, 46, 47, - 96, 97, 100, 101, 104, 105, 108, 109, 98, 99, 102, 103, 106, 107, 110, 111, - 48, 49, 52, 53, 56, 57, 60, 61, 50, 51, 54, 55, 58, 59, 62, 63, - 112, 113, 116, 117, 120, 121, 124, 125, 114, 115, 118, 119, 122, 123, 126, 127 -}; - -const uint8 clutTableT32I4[16] = -{ - 0, 1, 4, 5, 8, 9, 12, 13, - 2, 3, 6, 7, 10, 11, 14, 15 -}; - -const uint8 clutTableT16I8[32] = -{ - 0, 2, 8, 10, 16, 18, 24, 26, - 4, 6, 12, 14, 20, 22, 28, 30, - 1, 3, 9, 11, 17, 19, 25, 27, - 5, 7, 13, 15, 21, 23, 29, 31 -}; - -const uint8 clutTableT16I4[16] = -{ - 0, 2, 8, 10, 16, 18, 24, 26, - 4, 6, 12, 14, 20, 22, 28, 30 -}; diff --git a/plugins/GSdx_legacy/GSTables.h b/plugins/GSdx_legacy/GSTables.h deleted file mode 100644 index cd05929557..0000000000 --- a/plugins/GSdx_legacy/GSTables.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -extern const uint8 blockTable32[4][8]; -extern const uint8 blockTable32Z[4][8]; -extern const uint8 blockTable16[8][4]; -extern const uint8 blockTable16S[8][4]; -extern const uint8 blockTable16Z[8][4]; -extern const uint8 blockTable16SZ[8][4]; -extern const uint8 blockTable8[4][8]; -extern const uint8 blockTable4[8][4]; -extern const uint8 columnTable32[8][8]; -extern const uint8 columnTable16[8][16]; -extern const uint8 columnTable8[16][16]; -extern const uint16 columnTable4[16][32]; -extern const uint8 clutTableT32I8[128]; -extern const uint8 clutTableT32I4[16]; -extern const uint8 clutTableT16I8[32]; -extern const uint8 clutTableT16I4[16]; diff --git a/plugins/GSdx_legacy/GSTexture.cpp b/plugins/GSdx_legacy/GSTexture.cpp deleted file mode 100644 index 9460deb5bf..0000000000 --- a/plugins/GSdx_legacy/GSTexture.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTexture.h" - -GSTexture::GSTexture() - : m_scale(1, 1) - , m_size(0, 0) - , m_type(0) - , m_format(0) - , m_msaa(false) - , last_frame_used(0) - , LikelyOffset(false) - , OffsetHack_modx(0.0f) - , OffsetHack_mody(0.0f) -{ -} diff --git a/plugins/GSdx_legacy/GSTexture.h b/plugins/GSdx_legacy/GSTexture.h deleted file mode 100644 index 5418cfebbb..0000000000 --- a/plugins/GSdx_legacy/GSTexture.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSVector.h" - -class GSTexture -{ -protected: - GSVector2 m_scale; - GSVector2i m_size; - int m_type; - int m_format; - bool m_msaa; - -public: - struct GSMap {uint8* bits; int pitch;}; - - enum {RenderTarget = 1, DepthStencil, Texture, Offscreen, Backbuffer}; - -public: - GSTexture(); - virtual ~GSTexture() {} - - virtual operator bool() {ASSERT(0); return false;} - - virtual bool Update(const GSVector4i& r, const void* data, int pitch) = 0; - virtual bool Map(GSMap& m, const GSVector4i* r = NULL) = 0; - virtual void Unmap() = 0; - virtual bool Save(const string& fn, bool user_image = false, bool dds = false) = 0; - virtual void Invalidate() {} - virtual uint32 GetID() { return 0; } - - GSVector2 GetScale() const {return m_scale;} - void SetScale(const GSVector2& scale) {m_scale = scale;} - - int GetWidth() const {return m_size.x;} - int GetHeight() const {return m_size.y;} - GSVector2i GetSize() const {return m_size;} - - int GetType() const {return m_type;} - int GetFormat() const {return m_format;} - - bool IsMSAA() const {return m_msaa;} - - // frame number (arbitrary base) the texture was recycled on - // different purpose than texture cache ages, do not attempt to merge - unsigned last_frame_used; - - bool LikelyOffset; - float OffsetHack_modx; - float OffsetHack_mody; - - // Typical size of a RGBA texture - virtual uint32 GetMemUsage() { return m_size.x * m_size.y * 4; } -}; diff --git a/plugins/GSdx_legacy/GSTexture11.cpp b/plugins/GSdx_legacy/GSTexture11.cpp deleted file mode 100644 index 2e4cbf557b..0000000000 --- a/plugins/GSdx_legacy/GSTexture11.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTexture11.h" -#include "GSPng.h" - -GSTexture11::GSTexture11(ID3D11Texture2D* texture) - : m_texture(texture) -{ - ASSERT(m_texture); - - m_texture->GetDevice(&m_dev); - m_texture->GetDesc(&m_desc); - - m_dev->GetImmediateContext(&m_ctx); - - m_size.x = (int)m_desc.Width; - m_size.y = (int)m_desc.Height; - - if(m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) m_type = RenderTarget; - else if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) m_type = DepthStencil; - else if(m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) m_type = Texture; - else if(m_desc.Usage == D3D11_USAGE_STAGING) m_type = Offscreen; - - m_format = (int)m_desc.Format; - - m_msaa = m_desc.SampleDesc.Count > 1; -} - -bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch) -{ - if(m_dev && m_texture) - { - D3D11_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1}; - - m_ctx->UpdateSubresource(m_texture, 0, &box, data, pitch, 0); - - return true; - } - - return false; -} - -bool GSTexture11::Map(GSMap& m, const GSVector4i* r) -{ - if(r != NULL) - { - // ASSERT(0); // not implemented - - return false; - } - - if(m_texture && m_desc.Usage == D3D11_USAGE_STAGING) - { - D3D11_MAPPED_SUBRESOURCE map; - - if(SUCCEEDED(m_ctx->Map(m_texture, 0, D3D11_MAP_READ_WRITE, 0, &map))) - { - m.bits = (uint8*)map.pData; - m.pitch = (int)map.RowPitch; - - return true; - } - } - - return false; -} - -void GSTexture11::Unmap() -{ - if(m_texture) - { - m_ctx->Unmap(m_texture, 0); - } -} - -bool GSTexture11::Save(const string& fn, bool user_image, bool dds) -{ - CComPtr res; - D3D11_TEXTURE2D_DESC desc; - - m_texture->GetDesc(&desc); - - desc.Usage = D3D11_USAGE_STAGING; - desc.BindFlags = 0; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - - HRESULT hr = m_dev->CreateTexture2D(&desc, nullptr, &res); - if (FAILED(hr)) - { - return false; - } - - m_ctx->CopyResource(res, m_texture); - - if (m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) - { - CComPtr dst; - - desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - desc.CPUAccessFlags |= D3D11_CPU_ACCESS_WRITE; - - hr = m_dev->CreateTexture2D(&desc, nullptr, &dst); - if (FAILED(hr)) - { - return false; - } - - D3D11_MAPPED_SUBRESOURCE sm, dm; - - hr = m_ctx->Map(res, 0, D3D11_MAP_READ, 0, &sm); - if (FAILED(hr)) - { - return false; - } - hr = m_ctx->Map(dst, 0, D3D11_MAP_WRITE, 0, &dm); - if (FAILED(hr)) - { - m_ctx->Unmap(res, 0); - return false; - } - - uint8* s = static_cast(sm.pData); - uint8* d = static_cast(dm.pData); - - for (uint32 y = 0; y < desc.Height; y++, s += sm.RowPitch, d += dm.RowPitch) - { - for (uint32 x = 0; x < desc.Width; x++) - { - reinterpret_cast(d)[x] = static_cast(ldexpf(reinterpret_cast(s)[x*2], 32)); - } - } - - m_ctx->Unmap(res, 0); - m_ctx->Unmap(dst, 0); - - res = dst; - } - - res->GetDesc(&desc); - - GSPng::Format format; - switch (desc.Format) - { - case DXGI_FORMAT_A8_UNORM: - format = GSPng::R8I_PNG; - break; - case DXGI_FORMAT_R8G8B8A8_UNORM: - format = dds ? GSPng::RGBA_PNG : (m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL ? GSPng::RGB_A_PNG : GSPng::RGB_PNG); - break; - default: - fprintf(stderr, "DXGI_FORMAT %d not saved to image\n", desc.Format); - return false; - } - - D3D11_MAPPED_SUBRESOURCE sm; - hr = m_ctx->Map(res, 0, D3D11_MAP_READ, 0, &sm); - if (FAILED(hr)) - { - return false; - } - - int compression = user_image ? Z_BEST_COMPRESSION : theApp.GetConfig("png_compression_level", Z_BEST_SPEED); - bool success = GSPng::Save(format, fn, static_cast(sm.pData), desc.Width, desc.Height, sm.RowPitch, compression); - - m_ctx->Unmap(res, 0); - - return success; -} - -GSTexture11::operator ID3D11Texture2D*() -{ - return m_texture; -} - -GSTexture11::operator ID3D11ShaderResourceView*() -{ - if(!m_srv && m_dev && m_texture) - { - ASSERT(!m_msaa); - - m_dev->CreateShaderResourceView(m_texture, NULL, &m_srv); - } - - return m_srv; -} - -GSTexture11::operator ID3D11UnorderedAccessView*() -{ - if(!m_uav && m_dev && m_texture) - { - ASSERT(!m_msaa); - - m_dev->CreateUnorderedAccessView(m_texture, NULL, &m_uav); - } - - return m_uav; -} - -GSTexture11::operator ID3D11RenderTargetView*() -{ - ASSERT(m_dev); - - if(!m_rtv && m_dev && m_texture) - { - m_dev->CreateRenderTargetView(m_texture, NULL, &m_rtv); - } - - return m_rtv; -} - -GSTexture11::operator ID3D11DepthStencilView*() -{ - if(!m_dsv && m_dev && m_texture) - { - m_dev->CreateDepthStencilView(m_texture, NULL, &m_dsv); - } - - return m_dsv; -} diff --git a/plugins/GSdx_legacy/GSTexture11.h b/plugins/GSdx_legacy/GSTexture11.h deleted file mode 100644 index 8b9640e069..0000000000 --- a/plugins/GSdx_legacy/GSTexture11.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSTexture.h" - -class GSTexture11 : public GSTexture -{ - CComPtr m_dev; - CComPtr m_ctx; - CComPtr m_texture; - D3D11_TEXTURE2D_DESC m_desc; - CComPtr m_srv; - CComPtr m_uav; - CComPtr m_rtv; - CComPtr m_dsv; - -public: - explicit GSTexture11(ID3D11Texture2D* texture); - - bool Update(const GSVector4i& r, const void* data, int pitch); - bool Map(GSMap& m, const GSVector4i* r); - void Unmap(); - bool Save(const string& fn, bool user_image = false, bool dds = false); - - operator ID3D11Texture2D*(); - operator ID3D11ShaderResourceView*(); - operator ID3D11UnorderedAccessView*(); - operator ID3D11RenderTargetView*(); - operator ID3D11DepthStencilView*(); -}; diff --git a/plugins/GSdx_legacy/GSTexture9.cpp b/plugins/GSdx_legacy/GSTexture9.cpp deleted file mode 100644 index e322c2ee37..0000000000 --- a/plugins/GSdx_legacy/GSTexture9.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTexture9.h" -#include "GSPng.h" - -GSTexture9::GSTexture9(IDirect3DSurface9* surface) -{ - m_surface = surface; - - surface->GetDevice(&m_dev); - surface->GetDesc(&m_desc); - - if(m_desc.Type != D3DRTYPE_SURFACE) - { - surface->GetContainer(__uuidof(IDirect3DTexture9), (void**)&m_texture); - - ASSERT(m_texture != NULL); - } - - m_size.x = (int)m_desc.Width; - m_size.y = (int)m_desc.Height; - - if(m_desc.Usage & D3DUSAGE_RENDERTARGET) m_type = RenderTarget; - else if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) m_type = DepthStencil; - else if(m_desc.Pool == D3DPOOL_MANAGED) m_type = Texture; - else if(m_desc.Pool == D3DPOOL_SYSTEMMEM) m_type = Offscreen; - - m_format = (int)m_desc.Format; - - m_msaa = m_desc.MultiSampleType != D3DMULTISAMPLE_NONE; -} - -GSTexture9::GSTexture9(IDirect3DTexture9* texture) -{ - m_texture = texture; - - texture->GetDevice(&m_dev); - texture->GetLevelDesc(0, &m_desc); - texture->GetSurfaceLevel(0, &m_surface); - - ASSERT(m_surface != NULL); - - m_size.x = (int)m_desc.Width; - m_size.y = (int)m_desc.Height; - - if(m_desc.Usage & D3DUSAGE_RENDERTARGET) m_type = RenderTarget; - else if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) m_type = DepthStencil; - else if(m_desc.Pool == D3DPOOL_MANAGED) m_type = Texture; - else if(m_desc.Pool == D3DPOOL_SYSTEMMEM) m_type = Offscreen; - - m_format = (int)m_desc.Format; - - m_msaa = m_desc.MultiSampleType > 1; -} - -GSTexture9::~GSTexture9() -{ -} - -bool GSTexture9::Update(const GSVector4i& r, const void* data, int pitch) -{ - if(m_surface) - { - D3DLOCKED_RECT lr; - - if(SUCCEEDED(m_surface->LockRect(&lr, r, 0))) - { - uint8* src = (uint8*)data; - uint8* dst = (uint8*)lr.pBits; - - int bytes = r.width() * sizeof(uint32); - - switch(m_desc.Format) - { - case D3DFMT_A8: bytes >>= 2; break; - case D3DFMT_A1R5G5B5: bytes >>= 1; break; - default: ASSERT(m_desc.Format == D3DFMT_A8R8G8B8); break; - } - - bytes = min(bytes, pitch); - bytes = min(bytes, lr.Pitch); - - for(int i = 0, j = r.height(); i < j; i++, src += pitch, dst += lr.Pitch) - { - memcpy(dst, src, bytes); - } - - m_surface->UnlockRect(); - - return true; - } - } - - return false; -} - -bool GSTexture9::Map(GSMap& m, const GSVector4i* r) -{ - HRESULT hr; - - if(m_surface) - { - D3DLOCKED_RECT lr; - - if(SUCCEEDED(hr = m_surface->LockRect(&lr, (LPRECT)r, 0))) - { - m.bits = (uint8*)lr.pBits; - m.pitch = (int)lr.Pitch; - - return true; - } - } - - return false; -} - -void GSTexture9::Unmap() -{ - if(m_surface) - { - m_surface->UnlockRect(); - } -} - -bool GSTexture9::Save(const string& fn, bool user_image, bool dds) -{ - bool rb_swapped = true; - CComPtr surface; - - D3DSURFACE_DESC desc; - m_surface->GetDesc(&desc); - - if (m_desc.Usage & D3DUSAGE_DEPTHSTENCIL && desc.Format != D3DFMT_D32F_LOCKABLE) - { - return false; - } - - if (desc.Format == D3DFMT_A8 || desc.Pool == D3DPOOL_MANAGED || desc.Usage == D3DUSAGE_DEPTHSTENCIL) - { - surface = m_surface; - rb_swapped = false; - } - else - { - HRESULT hr; - - hr = m_dev->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &surface, nullptr); - if (FAILED(hr)) - { - return false; - } - - hr = m_dev->GetRenderTargetData(m_surface, surface); - if (FAILED(hr)) - { - return false; - } - } - - GSPng::Format format; - switch (desc.Format) - { - case D3DFMT_A8: - format = GSPng::R8I_PNG; - break; - case D3DFMT_A8R8G8B8: - format = dds? GSPng::RGBA_PNG : GSPng::RGB_PNG; - break; - case D3DFMT_D32F_LOCKABLE: - format = GSPng::RGB_A_PNG; - break; - default: - fprintf(stderr, "D3DFMT %d not saved to image\n", desc.Format); - return false; - } - - D3DLOCKED_RECT slr; - HRESULT hr = surface->LockRect(&slr, nullptr, 0); - if (FAILED(hr)) - { - return false; - } - - int compression = user_image ? Z_BEST_COMPRESSION : theApp.GetConfig("png_compression_level", Z_BEST_SPEED); - bool success = GSPng::Save(format, fn, static_cast(slr.pBits), desc.Width, desc.Height, slr.Pitch, compression, rb_swapped); - - surface->UnlockRect(); - return success; -} - -GSTexture9::operator IDirect3DSurface9*() -{ - return m_surface; -} - -GSTexture9::operator IDirect3DTexture9*() -{ - return m_texture; -} diff --git a/plugins/GSdx_legacy/GSTexture9.h b/plugins/GSdx_legacy/GSTexture9.h deleted file mode 100644 index 347734fd50..0000000000 --- a/plugins/GSdx_legacy/GSTexture9.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSTexture.h" - -class GSTexture9 : public GSTexture -{ - CComPtr m_dev; - CComPtr m_surface; - CComPtr m_texture; - D3DSURFACE_DESC m_desc; - -public: - explicit GSTexture9(IDirect3DSurface9* surface); - explicit GSTexture9(IDirect3DTexture9* texture); - virtual ~GSTexture9(); - - bool Update(const GSVector4i& r, const void* data, int pitch); - bool Map(GSMap& m, const GSVector4i* r); - void Unmap(); - bool Save(const string& fn, bool user_image = false, bool dds = false); - - operator IDirect3DSurface9*(); - operator IDirect3DTexture9*(); -}; diff --git a/plugins/GSdx_legacy/GSTextureCache.cpp b/plugins/GSdx_legacy/GSTextureCache.cpp deleted file mode 100644 index 4fbf330d23..0000000000 --- a/plugins/GSdx_legacy/GSTextureCache.cpp +++ /dev/null @@ -1,1722 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTextureCache.h" - -bool s_IS_OPENGL = false; -bool GSTextureCache::m_disable_partial_invalidation = false; - -GSTextureCache::GSTextureCache(GSRenderer* r) - : m_renderer(r) -{ - bool userhacks = !!theApp.GetConfig("UserHacks", 0); - s_IS_OPENGL = (static_cast(theApp.GetConfig("Renderer", static_cast(GSRendererType::Default))) == GSRendererType::OGL_HW); - - m_spritehack = userhacks ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0; - UserHacks_HalfPixelOffset = userhacks && theApp.GetConfig("UserHacks_HalfPixelOffset", 0); - - m_paltex = !!theApp.GetConfig("paltex", 0); - m_preload_frame = userhacks && theApp.GetConfig("preload_frame_with_gs_data", 0); - m_can_convert_depth = s_IS_OPENGL && theApp.GetConfig("texture_cache_depth", 1); - m_crc_hack_level = theApp.GetConfig("crc_hack_level", 3); - m_disable_partial_invalidation = userhacks && theApp.GetConfig("UserHacks_DisablePartialInvalidation", 0); - - // In theory 4MB is enough but 9MB is safer for overflow (8MB - // isn't enough in custom resolution) - // Test: onimusha 3 PAL 60Hz - m_temp = (uint8*)_aligned_malloc(9 * 1024 * 1024, 32); -} - -GSTextureCache::~GSTextureCache() -{ - RemoveAll(); - - _aligned_free(m_temp); -} - -void GSTextureCache::RemovePartial() -{ - //m_src.RemoveAll(); - - for (int type = 0; type < 2; type++) - { - for_each(m_dst[type].begin(), m_dst[type].end(), delete_object()); - - m_dst[type].clear(); - } -} - -void GSTextureCache::RemoveAll() -{ - m_src.RemoveAll(); - - for(int type = 0; type < 2; type++) - { - for_each(m_dst[type].begin(), m_dst[type].end(), delete_object()); - - m_dst[type].clear(); - } -} - -GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r) -{ - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; - //const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm; - - // Until DX is fixed - if (s_IS_OPENGL) { - if(psm.pal > 0) - m_renderer->m_mem.m_clut.Read32(TEX0, TEXA); - } else { - GIFRegTEXA plainTEXA; - - plainTEXA.AEM = 1; - plainTEXA.TA0 = 0; - plainTEXA.TA1 = 0x80; - m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA); - } - - const uint32* clut = m_renderer->m_mem.m_clut; - - Source* src = NULL; - - list& m = m_src.m_map[TEX0.TBP0 >> 5]; - - - for(list::iterator i = m.begin(); i != m.end(); i++) - { - Source* s = *i; - - if (((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH - continue; - - // Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check - if (!s->m_target) { - // We request a palette texture (psm.pal). If the texture was - // converted by the CPU (s->m_palette == NULL), we need to ensure - // palette content is the same. - // Note: content of the palette will be uploaded at the end of the function - if (psm.pal > 0 && s->m_palette == NULL && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0]))) - continue; - - // We request a 24/16 bit RGBA texture. Alpha expansion was done by - // the CPU. We need to check that TEXA is identical - if (psm.pal == 0 && psm.fmt > 0 && s->m_TEXA.u64 != TEXA.u64) - continue; - } - - m.splice(m.begin(), m, i); - - src = s; - - break; - } - - Target* dst = NULL; - bool half_right = false; - -#ifdef DISABLE_HW_TEXTURE_CACHE - if( 0 ) -#else - if(src == NULL) -#endif - { - uint32 bp = TEX0.TBP0; - uint32 psm = TEX0.PSM; - - // Arc the Lad finds the wrong surface here when looking for a depth stencil. - // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. - - // (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything - // else.) - - for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) - { - Target* t = *i; - - if(t->m_used && t->m_dirty.empty()) { - // Typical bug (MGS3 blue cloud): - // 1/ RT used as 32 bits => alpha channel written - // 2/ RT used as 24 bits => no update of alpha channel - // 3/ Lookup of texture that used alpha channel as index, HasSharedBits will return false - // because of the previous draw call format - // - // Solution: consider the RT as 32 bits if the alpha was used in the past - uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; - - if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) { - if (!s_IS_OPENGL && (psm == PSM_PSMT8)) { - // OpenGL can convert the texture directly in the GPU. Not sure we want to keep this - // code for DX. It fixes effect but it is slow (MGS3) - - // It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will - // be slow but - // 1/ it just works :) - // 2/ even with upscaling - // 3/ for both DX and OpenGL - - // Gregory: to avoid a massive slow down for nothing, let's only enable - // this code when CRC is below the FULL level - if (m_crc_hack_level < 3) - Read(t, t->m_valid); - else - dst = t; - } else { - dst = t; - } - - break; - - } else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM)) { - // Detect half of the render target (fix snow engine game) - // Target Page (8KB) have always a width of 64 pixels - // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 - half_right = true; - dst = t; - - break; - } - - } - } - - if (dst == NULL && CanConvertDepth()) { - // Let's try a trick to avoid to use wrongly a depth buffer - // Unfortunately, I don't have any Arc the Lad testcase - // - // 1/ Check only current frame, I guess it is only used as a postprocessing effect - for(list::iterator i = m_dst[DepthStencil].begin(); i != m_dst[DepthStencil].end(); i++) { - Target* t = *i; - - if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) - { - dst = t; - break; - } - } - } - } - - if(src == NULL) - { -#ifdef ENABLE_OGL_DEBUG - if (dst) { - GL_CACHE("TC: dst %s hit (%s): %d (0x%x, F:0x%x)", to_string(dst->m_type), half_right ? "half" : "full", - dst->m_texture ? dst->m_texture->GetID() : 0, - TEX0.TBP0, TEX0.PSM); - } else { - GL_CACHE("TC: src miss (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM); - } -#endif - src = CreateSource(TEX0, TEXA, dst, half_right); - - if(src == NULL) - { - return NULL; - } - - } else { - GL_CACHE("TC: src hit: %d (0x%x, F:0x%x)", - src->m_texture ? src->m_texture->GetID() : 0, - TEX0.TBP0, TEX0.PSM); - } - - if (src->m_palette) - { - int size = psm.pal * sizeof(clut[0]); - - if(src->m_initpalette || !GSVector4i::update(src->m_clut, clut, size)) - { - src->m_palette->Update(GSVector4i(0, 0, psm.pal, 1), src->m_clut, size); - src->m_initpalette = false; - } - } - - src->Update(r); - - m_src.m_used = true; - - return src; -} - -GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used) -{ - uint32 bp = TEX0.TBP0; - - Target* dst = NULL; - - for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) - { - Target* t = *i; - - if(bp == t->m_TEX0.TBP0) - { - m_dst[type].splice(m_dst[type].begin(), m_dst[type], i); - - dst = t; - - dst->m_32_bits_fmt |= !(TEX0.PSM & 2); - dst->m_TEX0 = TEX0; - - break; - } - } - - if (dst) { - GL_CACHE("TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, F:0x%x)", to_string(type), w, h, dst->m_texture->GetID(), bp, TEX0.PSM); - - dst->Update(); - - dst->m_dirty_alpha |= (TEX0.PSM != PSM_PSMCT24) && (TEX0.PSM != PSM_PSMZ24); - - } else if (CanConvertDepth()) { - - int rev_type = (type == DepthStencil) ? RenderTarget : DepthStencil; - GSVector4 sRect(0, 0, 1.0, 1.0); - GSVector4 dRect(0, 0, w, h); - - // Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick - // some bad data. - - for(list::iterator i = m_dst[rev_type].begin(); i != m_dst[rev_type].end(); i++) - { - Target* t = *i; - - if(!t->m_age && bp == t->m_TEX0.TBP0) - { - dst = CreateTarget(TEX0, w, h, type); - dst->m_32_bits_fmt = t->m_32_bits_fmt; - - if (type == DepthStencil) { - GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); - int shader = ShaderConvert_RGBA8_TO_FLOAT32 + GSLocalMemory::m_psm[TEX0.PSM].fmt; - m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false); - } else { - GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); - m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, ShaderConvert_FLOAT32_TO_RGBA8, false); - } - - break; - } - } - } - - if(dst == NULL) - { - GL_CACHE("TC: Lookup Target(%s) %dx%d, miss (0x%x, F:0x%x)", to_string(type), w, h, bp, TEX0.PSM); - - dst = CreateTarget(TEX0, w, h, type); - - if(dst == NULL) - return NULL; - - // In theory new textures contain invalidated data. Still in theory a new target - // must contains the content of the GS memory. - // In practice, TC will wrongly invalidate some RT. For example due to write on the alpha - // channel but colors is still valid. Unfortunately TC doesn't support the upload of data - // in target. - // - // Cleaning the code here will likely break several games. However it might reduce - // the noise in draw call debugging. It is the main reason to enable it on debug build. - // - // From a performance point of view, it might cost a little on big upscaling - // but normally few RT are miss so it must remain reasonable. - if (s_IS_OPENGL) { - if (m_preload_frame) { - GL_INS("Preloading the RT DATA"); - // RT doesn't have height but if we use a too big value, we will read outside of the GS memory. - int page0 = TEX0.TBP0 >> 5; - int max_page = (MAX_PAGES - page0); - int max_h = 32 * max_page / TEX0.TBW; - // h is likely smaller than w (true most of the time). Reduce the upload size (speed) - max_h = std::min(max_h, TEX0.TBW * 64); - - dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, max_h), TEX0.PSM)); - dst->Update(); - } else { -#ifdef ENABLE_OGL_DEBUG - switch (type) { - case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; - case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break; - default:break; - } -#endif - } - } - } - - if(m_renderer->CanUpscale()) - { - int multiplier = m_renderer->GetUpscaleMultiplier(); - - if(multiplier > 1) // it's limited to a maximum of 4 on reading the config - { - dst->m_texture->SetScale(GSVector2((float)multiplier, (float)multiplier)); - } - else - { - GSVector4i fr = m_renderer->GetFrameRect(); - - int ww = (int)(fr.left + m_renderer->GetDisplayRect().width()); - int hh = (int)(fr.top + m_renderer->GetDisplayRect().height()); - - if(hh <= m_renderer->GetDeviceSize().y / 2) - { - hh *= 2; - } - - // Gregory: I'm sure this sillyness is related to the usage of a 32bits - // buffer as a 16 bits format. In this case the height of the buffer is - // multiplyed by 2 (Hence a scissor bigger than the RT) - - // This vp2 fix doesn't work most of the time - - if(hh < 512 && m_renderer->m_context->SCISSOR.SCAY1 == 511) // vp2 - { - hh = 512; - } - - if(ww > 0 && hh > 0) - { - dst->m_texture->SetScale(GSVector2((float)w / ww, (float)h / hh)); - } - } - } - - if(used) - { - dst->m_used = true; - } - - return dst; -} - -GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h) -{ - uint32 bp = TEX0.TBP0; - - Target* dst = NULL; - - for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) - { - Target* t = *i; - - if(bp == t->m_TEX0.TBP0) - { - dst = t; - - GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: %d (0x%x)", w, h, dst->m_texture->GetID(), bp); - - break; - } - else - { - // HACK: try to find something close to the base pointer - - if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0xe00UL && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0)) - { - GL_CACHE("TC: Lookup Frame %dx%d, close hit: %d (0x%x, took 0x%x)", w, h, t->m_texture->GetID(), bp, t->m_TEX0.TBP0); - dst = t; - } - } - } - - if(dst == NULL) - { - GL_CACHE("TC: Lookup Frame %dx%d, miss (0x%x)", w, h, bp); - - dst = CreateTarget(TEX0, w, h, RenderTarget); - - if(dst == NULL) - { - return NULL; - } - - m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); // new frame buffers after reset should be cleared, don't display memory garbage - - if (m_preload_frame) { - // Load GS data into frame. Game can directly uploads a background or the full image in - // "CTRC" buffer. It will also avoid various black screen issue in gs dump. - // - // Code is more or less an equivalent of the SW renderer - // - // Option is hidden and not enabled by default to avoid any regression - dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, real_h), TEX0.PSM)); - dst->Update(); - } - } - else - { - dst->Update(); - } - - dst->m_used = true; - - return dst; -} - -// Goal: Depth And Target at the same address is not possible. On GS it is -// the same memory but not on the Dx/GL. Therefore a write to the Depth/Target -// must invalidate the Target/Depth respectively -void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp) -{ - if (!CanConvertDepth()) - return; - - for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) - { - Target* t = *i; - - if(bp == t->m_TEX0.TBP0) - { - GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); - - m_dst[type].erase(i); - delete t; - - break; - } - } - -} - -// Goal: invalidate data sent to the GPU when the source (GS memory) is modified -// Called each time you want to write to the GS memory -void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target) -{ - if(!off) return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549. - - uint32 bp = off->bp; - uint32 bw = off->bw; - uint32 psm = off->psm; - - if(!target) - { - // Remove Source that have same BP as the render target (color&dss) - // rendering will dirty the copy - const list& m = m_src.m_map[bp >> 5]; - - for(list::const_iterator i = m.begin(); i != m.end(); ) - { - list::const_iterator j = i++; - - Source* s = *j; - - if(GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) - { - m_src.RemoveAt(s); - } - } - - uint32 bbp = bp + bw * 0x10; - if (bw >= 16 && bbp < 16384) { - // Detect half of the render target (fix snow engine game) - // Target Page (8KB) have always a width of 64 pixels - // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 - - const list& m = m_src.m_map[bbp >> 5]; - - for(list::const_iterator i = m.begin(); i != m.end(); ) - { - list::const_iterator j = i++; - - Source* s = *j; - - if(GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) - { - m_src.RemoveAt(s); - } - } - } - } - - GSVector4i r; - - uint32* pages = (uint32*)m_temp; - - off->GetPages(rect, pages, &r); - - bool found = false; - - for(const uint32* p = pages; *p != GSOffset::EOP; p++) - { - uint32 page = *p; - - const list& m = m_src.m_map[page]; - - for(list::const_iterator i = m.begin(); i != m.end(); ) - { - list::const_iterator j = i++; - - Source* s = *j; - - if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM)) - { - uint32* RESTRICT valid = s->m_valid; - - bool b = bp == s->m_TEX0.TBP0; - - if(!s->m_target) - { - if (m_disable_partial_invalidation && s->m_repeating) { - m_src.RemoveAt(s); - } else { - // Invalidate data of input texture - if(s->m_repeating) - { - // Note: very hot path on snowbling engine game - vector& l = s->m_p2t[page]; - - for(vector::iterator k = l.begin(); k != l.end(); k++) - { - valid[k->x] &= k->y; - } - } - else - { - valid[page] = 0; - } - - s->m_complete = false; - - found |= b; - } - } - else - { - // render target used as input texture - // TODO - - if(b) - { - m_src.RemoveAt(s); - } - } - } - } - } - - if(!target) return; - - for(int type = 0; type < 2; type++) - { - for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); ) - { - list::iterator j = i++; - - Target* t = *j; - - // GH: (I think) this code is completely broken. Typical issue: - // EE write an alpha channel into 32 bits texture - // Results: the target is deleted (because HasCompatibleBits is false) - // - // Major issues are expected if the game try to reuse the target - // If we dirty the RT, it will likely upload partially invalid data. - // (The color on the previous example) - if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) - { - if(!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) - { - GL_CACHE("TC: Dirty Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); - t->m_dirty.push_back(GSDirtyRect(r, psm)); - t->m_TEX0.TBW = bw; - } - else - { - m_dst[type].erase(j); - GL_CACHE("TC: Remove Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); - delete t; - continue; - } - } else if (bp == t->m_TEX0.TBP0) { - // EE writes the ALPHA channel. Mark it as invalid for - // the texture cache. Otherwise it will generate a wrong - // hit on the texture cache. - // Game: Conflict - Desert Storm (flickering) - t->m_dirty_alpha = false; - } - - // GH: Try to detect texture write that will overlap with a target buffer - if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && bp < t->m_TEX0.TBP0) - { - uint32 rowsize = bw * 8192; - uint32 offset = (uint32)((t->m_TEX0.TBP0 - bp) * 256); - - if(rowsize > 0 && offset % rowsize == 0) - { - int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; - - if(r.bottom > y) - { - GL_CACHE("TC: Dirty After Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); - // TODO: do not add this rect above too - t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), psm)); - t->m_TEX0.TBW = bw; - continue; - } - } - } - - // FIXME: this code "fixes" black FMV issue with rule of rose. - // Code is completely hardcoded so maybe not the best solution. Besides I don't - // know the full impact of it. - // Let's keep this code for the future -#if 0 - if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && (t->m_TEX0.TBP0 + 0x200 == bp)) - { - GL_CACHE("TC: Dirty in the middle of Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); - - uint32 rowsize = bw * 8192u; - uint32 offset = 0x200 * 256u; - int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; - - t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top + y, r.right, r.bottom + y), psm)); - t->m_TEX0.TBW = bw; - continue; - } -#endif - } - } -} - -// Goal: retrive the data from the GPU to the GS memory. -// Called each time you want to read from the GS memory -void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r) -{ - uint32 bp = off->bp; - uint32 psm = off->psm; - //uint32 bw = off->bw; - - // No depth handling please. - if (psm == PSM_PSMZ32 || psm == PSM_PSMZ24 || psm == PSM_PSMZ16 || psm == PSM_PSMZ16S) { - GL_INS("ERROR: InvalidateLocalMem depth format isn't supported"); - if (m_can_convert_depth) { - for(auto t : m_dst[DepthStencil]) { - if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { - // Read the full depth buffer for easy testing - Read(t, t->m_valid); - } - } - } - return; - } - - // This is a shorter but potentially slower version of the below, commented out code. - // It works for all the games mentioned below and fixes a couple of other ones as well - // (Busen0: Wizardry and Chaos Legion). - // Also in a few games the below code ran the Grandia3 case when it shouldn't :p - for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ) - { - list::iterator j = i++; - - Target* t = *j; - - if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S) - { - if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) - { - // GH Note: Read will do a StretchRect and then will sizzle data to the GS memory - // t->m_valid will do the full target texture whereas r.intersect(t->m_valid) will be limited - // to the useful part for the transfer. - // 1/ Logically intersect must be enough, except if we miss some call to InvalidateLocalMem - // or it need the depth part too - // 2/ Read function is slow but I suspect the swizzle part to be costly. Maybe a compute shader - // that do the swizzle at the same time of the Stretching could save CPU computation. - - // note: r.rintersect breaks Wizardry and Chaos Legion - // Read(t, t->m_valid) works in all tested games but is very slow in GUST titles >< - if (GSTextureCache::m_disable_partial_invalidation) { - Read(t, r.rintersect(t->m_valid)); - } else { - if (r.x == 0 && r.y == 0) // Full screen read? - Read(t, t->m_valid); - else // Block level read? - Read(t, r.rintersect(t->m_valid)); - } - } - } else { - GL_INS("ERROR: InvalidateLocalMem target is a depth format"); - } - } - - //GSTextureCache::Target* rt2 = NULL; - //int ymin = INT_MAX; - //for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ) - //{ - // list::iterator j = i++; - - // Target* t = *j; - - // if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S) - // { - // if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) - // { - // if(GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) - // { - // Read(t, r.rintersect(t->m_valid)); - // return; - // } - // else if(psm == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S)) - // { - // // ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit - // Read(t, GSVector4i(r.left, r.top, r.right, r.top + (r.bottom - r.top) * 2).rintersect(t->m_valid)); - // return; - // } - // else - // { - // if (psm == PSM_PSMT4HH && t->m_TEX0.PSM == PSM_PSMCT32) - // { - // // Silent Hill Origins shadows: Read 8 bit using only the HIGH bits (4 bit) texture as 32 bit. - // Read(t, r.rintersect(t->m_valid)); - // return; - // } - // else - // { - // //printf("Trashing render target. We have a %d type texture and we are trying to write into a %d type texture\n", t->m_TEX0.PSM, psm); - // m_dst[RenderTarget].erase(j); - // delete t; - // } - // } - // } - - // // Grandia3, FFX, FFX-2 pause menus. t->m_TEX0.TBP0 magic number checks because otherwise kills xs2 videos - // if( (GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && (bp > t->m_TEX0.TBP0) ) - // && ((t->m_TEX0.TBP0 == 0) || (t->m_TEX0.TBP0==3328) || (t->m_TEX0.TBP0==3584) )) - // { - // //printf("first : %d-%d child : %d-%d\n", psm, bp, t->m_TEX0.PSM, t->m_TEX0.TBP0); - // uint32 rowsize = bw * 8192; - // uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256); - - // if(rowsize > 0 && offset % rowsize == 0) - // { - // int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; - - // if(y < ymin && y < 512) - // { - // rt2 = t; - // ymin = y; - // } - // } - // } - // } - //} - //if(rt2) - //{ - // Read(rt2, GSVector4i(r.left, r.top + ymin, r.right, r.bottom + ymin)); - //} - - - // TODO: ds -} - -void GSTextureCache::IncAge() -{ - int maxage = m_src.m_used ? 3 : 30; - - // You can't use m_map[page] because Source* are duplicated on several pages. - for(hash_set::iterator i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end(); ) - { - hash_set::iterator j = i++; - - Source* s = *j; - - if(++s->m_age > maxage) - { - m_src.RemoveAt(s); - } - } - - m_src.m_used = false; - - // Clearing of Rendertargets causes flickering in many scene transitions. - // Sigh, this seems to be used to invalidate surfaces. So set a huge maxage to avoid flicker, - // but still invalidate surfaces. (Disgaea 2 fmv when booting the game through the BIOS) - // Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions. - maxage = 400; // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it - - for(int type = 0; type < 2; type++) - { - for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); ) - { - list::iterator j = i++; - - Target* t = *j; - - // This variable is used to detect the texture shuffle effect. There is a high - // probability that game will do it on the current RT. - // Variable is cleared here to avoid issue with game that uses a 16 bits - // render target - if (t->m_age > 0) { - // GoW2 uses the effect at the start of the frame - t->m_32_bits_fmt = false; - } - - if(++t->m_age > maxage) - { - m_dst[type].erase(j); - GL_CACHE("TC: Remove Target(%s): %d (0x%x) due to age", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); - - delete t; - } - } - } -} - -//Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work. -GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right) -{ - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; - Source* src = new Source(m_renderer, TEX0, TEXA, m_temp); - - int tw = 1 << TEX0.TW; - int th = 1 << TEX0.TH; - //int tp = TEX0.TBW << 6; - - bool hack = false; - - if(m_spritehack && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H)) - { - src->m_spritehack_t = true; - - if(m_spritehack == 2 && TEX0.CPSM != PSM_PSMCT16) - src->m_spritehack_t = false; - } - else - src->m_spritehack_t = false; - - if (dst) - { - // TODO: clean up this mess - - int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY; - bool is_8bits = TEX0.PSM == PSM_PSMT8 && s_IS_OPENGL; - - if (is_8bits) { - GL_INS("Reading RT as a packed-indexed 8 bits format"); - shader = ShaderConvert_RGBA_TO_8I; - } - -#ifdef ENABLE_OGL_DEBUG - if (TEX0.PSM == PSM_PSMT4) { - GL_INS("ERROR: Reading RT as a packed-indexed 4 bits format is not supported"); - } -#endif - - if (TEX0.PSM < PSM_PSMT8 || TEX0.PSM > PSM_PSMT4HH) { - src->m_32_bits_fmt = dst->m_32_bits_fmt; - } - src->m_target = true; - - dst->Update(); - - GSTexture* tmp = NULL; - - if (dst->m_texture->IsMSAA()) - { - tmp = dst->m_texture; - - dst->m_texture = m_renderer->m_dev->Resolve(dst->m_texture); - } - - - // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) - - int w = (int)(dst->m_texture->GetScale().x * tw); - int h = (int)(dst->m_texture->GetScale().y * th); - if (is_8bits) { - // Unscale 8 bits textures, quality won't be nice but format is really awful - w = tw; - h = th; - } - - GSVector2i dstsize = dst->m_texture->GetSize(); - - // pitch conversion - - if(dst->m_TEX0.TBW != TEX0.TBW) // && dst->m_TEX0.PSM == TEX0.PSM - { - // This is so broken :p - ////Better not do the code below, "fixes" like every game that ever gets here.. - ////Edit: Ratchet and Clank needs this to show most of it's graphics at all. - ////Someone else fix this please, I can't :p - ////delete src; return NULL; - - //// sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left) - - //ASSERT(dst->m_TEX0.TBW > TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO) - - //src->m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y, false); - - //GSVector4 size = GSVector4(dstsize).xyxy(); - //GSVector4 scale = GSVector4(dst->m_texture->GetScale()).xyxy(); - - //int blockWidth = 64; - //int blockHeight = TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24 ? 32 : 64; - - //GSVector4i br(0, 0, blockWidth, blockHeight); - - //int sw = (int)dst->m_TEX0.TBW << 6; - - //int dw = (int)TEX0.TBW << 6; - //int dh = 1 << TEX0.TH; - - //if(sw != 0) - //for(int dy = 0; dy < dh; dy += blockHeight) - //{ - // for(int dx = 0; dx < dw; dx += blockWidth) - // { - // int off = dy * dw / blockHeight + dx; - - // int sx = off % sw; - // int sy = off / sw; - - // GSVector4 sRect = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size; - // GSVector4 dRect = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale; - - // m_renderer->m_dev->StretchRect(dst->m_texture, sRect, src->m_texture, dRect); - - // // TODO: this is quite a lot of StretchRect, do it with one Draw - // } - //} - } - else if(tw < 1024) - { - // FIXME: timesplitters blurs the render target by blending itself over a couple of times - hack = true; - //if(tw == 256 && th == 128 && (TEX0.TBP0 == 0 || TEX0.TBP0 == 0x00e00)) - //{ - // delete src; - // return NULL; - //} - } - // width/height conversion - - GSVector2 scale = dst->m_texture->GetScale(); - - GSVector4 dRect(0, 0, w, h); - - // Lengthy explanation of the rescaling code. - // Here an example in 2x: - // RT is 1280x1024 but only contains 512x448 valid data (so 256x224 pixels without upscaling) - // - // PS2 want to read it back as a 1024x1024 pixels (they don't care about the extra pixels) - // So in theory we need to shrink a 2048x2048 RT into a 1024x1024 texture. Obviously the RT is - // too small. - // - // So we will only limit the resize to the available data in RT. - // Therefore we will resize the RT from 1280x1024 to 1280x1024/2048x2048 % of the new texture - // size (which is 1280x1024) (i.e. 800x512) - // From the rendering point of view. UV coordinate will be normalized on the real GS texture size - // This way it can be used on an upscaled texture without extra scaling factor (only requirement is - // to have same proportion) - // - // FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0) - // At 2x it will become 0.5/128 * 256 = 1 (pixel 1) - // I think it is the purpose of the UserHacks_HalfPixelOffset below. However implementation is less - // than ideal. - // 1/ It suppose games have an half pixel offset on texture coordinate which could be wrong - // 2/ It doesn't support rescaling of the RT (tw = 1024) - // Maybe it will be more easy to just round the UV value in the Vertex Shader - - if (!is_8bits) { - // 8 bits handling is special due to unscaling. It is better to not execute this code - if (w > dstsize.x) - { - scale.x = (float)dstsize.x / tw; - dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x; - w = dstsize.x; - } - - if (h > dstsize.y) - { - scale.y = (float)dstsize.y / th; - dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y; - h = dstsize.y; - } - } - - GSVector4 sRect(0, 0, w, h); - - GSTexture* sTex = src->m_texture ? src->m_texture : dst->m_texture; - GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(w, h, false); - - // GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format - // However it is different here. We want to reuse a Render Target as a texture. - // Because the texture is already on the GPU, CPU can't convert it. - if (psm.pal > 0) { - src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); - } - // Disable linear filtering for various GS post-processing effect - // 1/ Palette is used to interpret the alpha channel of the RT as an index. - // Star Ocean 3 uses it to emulate a stencil buffer. - // 2/ Z formats are a bad idea to interpolate (discontinuties). - // 3/ 16 bits buffer is used to move data from a channel to another. - // - // I keep linear filtering for standard color even if I'm not sure that it is - // working correctly. - // Indeed, texture is reduced so you need to read all covered pixels (9 in 3x) - // to correctly interpolate the value. Linear interpolation is likely acceptable - // only in 2x scaling - // - // Src texture will still be bilinear interpolated so I'm really not sure - // that we need to do it here too. - // - // Future note: instead to do - // RT 2048x2048 -> T 1024x1024 -> RT 2048x2048 - // We can maybe sample directly a bigger texture - // RT 2048x2048 -> T 2048x2048 -> RT 2048x2048 - // Pro: better quality. Copy instead of StretchRect (must be faster) - // Cons: consume more memory - // - // In distant future: investigate to reuse the RT directly without any - // copy. Likely a speed boost and memory usage reduction. - bool linear = (TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24); - - if(!src->m_texture) - { - src->m_texture = dTex; - } - - if ((sRect == dRect).alltrue() && !shader) - { - if (half_right) { - // You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT - // which is arbitrary set to 1280 (biggest RT used by GS). h/w are based on the input texture - // so the only reliable way to find the real size of the target is to use the TBW value. - float real_width = dst->m_TEX0.TBW * 64u * dst->m_texture->GetScale().x; - m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i(real_width/2.0f, 0, real_width, h)); - } else { - m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i(0, 0, w, h)); // <= likely wrong dstsize.x could be bigger than w - } - } - else - { - // Different size or not the same format - sRect.z /= sTex->GetWidth(); - sRect.w /= sTex->GetHeight(); - - if (half_right) { - sRect.x = sRect.z/2.0f; - } - - m_renderer->m_dev->StretchRect(sTex, sRect, dTex, dRect, shader, linear); - } - - if(dTex != src->m_texture) - { - m_renderer->m_dev->Recycle(src->m_texture); - - src->m_texture = dTex; - } - - if( src->m_texture ) - src->m_texture->SetScale(scale); - else - ASSERT(0); - - if(tmp != NULL) - { - // tmp is the texture before a MultiSample resolve - m_renderer->m_dev->Recycle(dst->m_texture); - - dst->m_texture = tmp; - } - - // Offset hack. Can be enabled via GSdx options. - // The offset will be used in Draw(). - - float modx = 0.0f; - float mody = 0.0f; - - if(UserHacks_HalfPixelOffset && hack) - { - switch(m_renderer->GetUpscaleMultiplier()) - { - case 2: modx = 2.2f; mody = 2.2f; dst->m_texture->LikelyOffset = true; break; - case 3: modx = 3.1f; mody = 3.1f; dst->m_texture->LikelyOffset = true; break; - case 4: modx = 4.2f; mody = 4.2f; dst->m_texture->LikelyOffset = true; break; - case 5: modx = 5.3f; mody = 5.3f; dst->m_texture->LikelyOffset = true; break; - case 6: modx = 6.2f; mody = 6.2f; dst->m_texture->LikelyOffset = true; break; - case 8: modx = 8.2f; mody = 8.2f; dst->m_texture->LikelyOffset = true; break; - default: modx = 0.0f; mody = 0.0f; dst->m_texture->LikelyOffset = false; break; - } - } - - dst->m_texture->OffsetHack_modx = modx; - dst->m_texture->OffsetHack_mody = mody; - } - else - { - if (m_paltex && psm.pal > 0) - { - src->m_texture = m_renderer->m_dev->CreateTexture(tw, th, Get8bitFormat()); - src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); - } - else - src->m_texture = m_renderer->m_dev->CreateTexture(tw, th); - } - - if(src->m_texture == NULL) - { - ASSERT(0); - delete src; - return NULL; - } - - if(psm.pal > 0) - { - memcpy(src->m_clut, (const uint32*)m_renderer->m_mem.m_clut, psm.pal * sizeof(uint32)); - } - - m_src.Add(src, TEX0, m_renderer->m_context->offset.tex); - - return src; -} - -GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type) -{ - Target* t = new Target(m_renderer, TEX0, m_temp, CanConvertDepth()); - - // FIXME: initial data should be unswizzled from local mem in Update() if dirty - - t->m_type = type; - - if(type == RenderTarget) - { - t->m_texture = m_renderer->m_dev->CreateRenderTarget(w, h, true); - - t->m_used = true; // FIXME - } - else if(type == DepthStencil) - { - t->m_texture = m_renderer->m_dev->CreateDepthStencil(w, h, true); - } - - if(t->m_texture == NULL) - { - ASSERT(0); - delete t; - return NULL; - } - - m_dst[type].push_front(t); - - return t; -} - -void GSTextureCache::PrintMemoryUsage() -{ -#ifdef ENABLE_OGL_DEBUG - uint32 tex = 0; - uint32 tex_rt = 0; - uint32 rt = 0; - uint32 dss = 0; - for(hash_set::iterator i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end(); i++) { - Source* s = *i; - if (s) { - if (s->m_target) - tex_rt += s->m_texture->GetMemUsage(); - else - tex += s->m_texture->GetMemUsage(); - - } - } - for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) { - Target* t = *i; - if (t) - rt += t->m_texture->GetMemUsage(); - } - for(list::iterator i = m_dst[DepthStencil].begin(); i != m_dst[DepthStencil].end(); i++) { - Target* t = *i; - if (t) - dss += t->m_texture->GetMemUsage(); - } - - GL_PERF("MEM: RO Tex %dMB. RW Tex %dMB. Target %dMB. Depth %dMB", tex >> 20u, tex_rt >> 20u, rt >> 20u, dss >> 20u); -#endif -} - -// GSTextureCache::Surface - -GSTextureCache::Surface::Surface(GSRenderer* r, uint8* temp) - : m_renderer(r) - , m_texture(NULL) - , m_age(0) - , m_temp(temp) - , m_32_bits_fmt(false) -{ - m_TEX0.TBP0 = 0x3fff; -} - -GSTextureCache::Surface::~Surface() -{ - m_renderer->m_dev->Recycle(m_texture); -} - -void GSTextureCache::Surface::Update() -{ - m_age = 0; -} - -// GSTextureCache::Source - -GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp) - : Surface(r, temp) - , m_palette(NULL) - , m_initpalette(true) - , m_target(false) - , m_complete(false) - , m_spritehack_t(false) - , m_p2t(NULL) -{ - m_TEX0 = TEX0; - m_TEXA = TEXA; - - memset(m_valid, 0, sizeof(m_valid)); - - m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32); - - memset(m_clut, 0, 256*sizeof(uint32)); - - m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32); - m_write.count = 0; - - m_repeating = m_TEX0.IsRepeating(); - - if(m_repeating) - { - m_p2t = r->m_mem.GetPage2TileMap(m_TEX0); - } -} - -GSTextureCache::Source::~Source() -{ - m_renderer->m_dev->Recycle(m_palette); - - _aligned_free(m_clut); - - _aligned_free(m_write.rect); -} - -void GSTextureCache::Source::Update(const GSVector4i& rect) -{ - Surface::Update(); - - if(m_complete || m_target) - { - return; - } - - GSVector2i bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs; - - int tw = std::max(1 << m_TEX0.TW, bs.x); - int th = std::max(1 << m_TEX0.TH, bs.y); - - GSVector4i r = rect.ralign(bs); - - if(r.eq(GSVector4i(0, 0, tw, th))) - { - m_complete = true; // lame, but better than nothing - } - - const GSOffset* off = m_renderer->m_context->offset.tex; - - uint32 blocks = 0; - - if(m_repeating) - { - for(int y = r.top; y < r.bottom; y += bs.y) - { - uint32 base = off->block.row[y >> 3]; - - for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) - { - uint32 block = base + off->block.col[x >> 3]; - - if(block < MAX_BLOCKS) - { - uint32 addr = i >> 3; - - uint32 row = addr >> 5; - uint32 col = 1 << (addr & 31); - - if((m_valid[row] & col) == 0) - { - m_valid[row] |= col; - - Write(GSVector4i(x, y, x + bs.x, y + bs.y)); - - blocks++; - } - } - } - } - } - else - { - for(int y = r.top; y < r.bottom; y += bs.y) - { - uint32 base = off->block.row[y >> 3]; - - for(int x = r.left; x < r.right; x += bs.x) - { - uint32 block = base + off->block.col[x >> 3]; - - if(block < MAX_BLOCKS) - { - uint32 row = block >> 5; - uint32 col = 1 << (block & 31); - - if((m_valid[row] & col) == 0) - { - m_valid[row] |= col; - - Write(GSVector4i(x, y, x + bs.x, y + bs.y)); - - blocks++; - } - } - } - } - } - - if(blocks > 0) - { - m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_palette ? 2 : 0)); - - Flush(m_write.count); - } -} - -void GSTextureCache::Source::Write(const GSVector4i& r) -{ - m_write.rect[m_write.count++] = r; - - while(m_write.count >= 2) - { - GSVector4i& a = m_write.rect[m_write.count - 2]; - GSVector4i& b = m_write.rect[m_write.count - 1]; - - if((a == b.zyxw()).mask() == 0xfff0) - { - a.right = b.right; // extend right - - m_write.count--; - } - else if((a == b.xwzy()).mask() == 0xff0f) - { - a.bottom = b.bottom; // extend down - - m_write.count--; - } - else - { - break; - } - } - - if(m_write.count > 2) - { - Flush(1); - } -} - -void GSTextureCache::Source::Flush(uint32 count) -{ - // This function as written will not work for paletted formats copied from framebuffers - // because they are 8 or 4 bit formats on the GS and the GS local memory module reads - // these into an 8 bit format while the D3D surfaces are 32 bit. - // However the function is never called for these cases. This is just for information - // should someone wish to use this function for these cases later. - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; - - int tw = 1 << m_TEX0.TW; - int th = 1 << m_TEX0.TH; - - GSVector4i tr(0, 0, tw, th); - - int pitch = max(tw, psm.bs.x) * sizeof(uint32); - - GSLocalMemory& mem = m_renderer->m_mem; - - const GSOffset* off = m_renderer->m_context->offset.tex; - - GSLocalMemory::readTexture rtx = psm.rtx; - - GIFRegTEXA plainTEXA; - - // Until DX is fixed - if (s_IS_OPENGL) { - plainTEXA = m_TEXA; - } else { - plainTEXA.AEM = 1; - plainTEXA.TA0 = 0; - plainTEXA.TA1 = 0x80; - } - - if(m_palette) - { - pitch >>= 2; - rtx = psm.rtxP; - } - - uint8* buff = m_temp; - - for(uint32 i = 0; i < count; i++) - { - GSVector4i r = m_write.rect[i]; - - if((r > tr).mask() & 0xff00) - { - (mem.*rtx)(off, r, buff, pitch, m_TEXA); - - m_texture->Update(r.rintersect(tr), buff, pitch); - } - else - { - GSTexture::GSMap m; - - if(m_texture->Map(m, &r)) - { - (mem.*rtx)(off, r, m.bits, m.pitch, plainTEXA); - - m_texture->Unmap(); - } - else - { - (mem.*rtx)(off, r, buff, pitch, plainTEXA); - - m_texture->Update(r, buff, pitch); - } - } - } - - if(count < m_write.count) - { - // Warning src and destination overlap. Memmove must be used instead of memcpy - memmove(&m_write.rect[0], &m_write.rect[count], (m_write.count - count) * sizeof(m_write.rect[0])); - } - - m_write.count -= count; -} - -// GSTextureCache::Target - -GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported) - : Surface(r, temp) - , m_type(-1) - , m_used(false) - , m_depth_supported(depth_supported) -{ - m_TEX0 = TEX0; - m_32_bits_fmt |= !(TEX0.PSM & 2); - m_dirty_alpha = (TEX0.PSM != PSM_PSMCT24) && (TEX0.PSM != PSM_PSMZ24); - - m_valid = GSVector4i::zero(); -} - -void GSTextureCache::Target::Update() -{ - Surface::Update(); - - // FIXME: the union of the rects may also update wrong parts of the render target (but a lot faster :) - // GH: it must be doable - // 1/ rescale the new t to the good size - // 2/ copy each rectangle (rescale the rectangle) (use CopyRect or multiple vertex) - // Alternate - // 1/ uses multiple vertex rectangle - - GSVector4i r = m_dirty.GetDirtyRectAndClear(m_TEX0, m_texture->GetSize()); - - if (r.rempty()) return; - - // No handling please - if ((m_type == DepthStencil) && !m_depth_supported) { - // do the most likely thing a direct write would do, clear it - GL_INS("ERROR: Update DepthStencil dummy"); - - if((m_renderer->m_game.flags & CRC::ZWriteMustNotClear) == 0) - m_renderer->m_dev->ClearDepth(m_texture, 0); - - return; - } - - int w = r.width(); - int h = r.height(); - - GIFRegTEXA TEXA; - - TEXA.AEM = 1; - TEXA.TA0 = 0; - TEXA.TA1 = 0x80; - - GSTexture* t = m_renderer->m_dev->CreateTexture(w, h); - if (t == NULL) return; - - const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); - - GSTexture::GSMap m; - - if(t->Map(m)) - { - m_renderer->m_mem.ReadTexture(off, r, m.bits, m.pitch, TEXA); - - t->Unmap(); - } - else - { - int pitch = ((w + 3) & ~3) * 4; - - m_renderer->m_mem.ReadTexture(off, r, m_temp, pitch, TEXA); - - t->Update(r.rsize(), m_temp, pitch); - } - - // m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4); - - // Copy the new GS memory content into the destination texture. - if(m_type == RenderTarget) - { - GL_INS("ERROR: Update RenderTarget"); - - m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy()); - } - else if(m_type == DepthStencil) - { - GL_INS("ERROR: Update DepthStencil"); - - // FIXME linear or not? - m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), ShaderConvert_RGBA8_TO_FLOAT32); - } - - m_renderer->m_dev->Recycle(t); -} - -// GSTextureCache::SourceMap - -void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset* off) -{ - m_surfaces.insert(s); - - if(s->m_target) - { - // TODO - - // GH: I don't know why but it seems we only consider the first page for a render target - - m_map[TEX0.TBP0 >> 5].push_front(s); - - return; - } - - // Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages) - // (Maybe GetPages could be used instead, perf opt?) - // The source pointer will be stored/duplicated in all m_map[array of pages] - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; - - GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs; - - int tw = 1 << TEX0.TW; - int th = 1 << TEX0.TH; - - for(int y = 0; y < th; y += bs.y) - { - uint32 base = off->block.row[y >> 3]; - - for(int x = 0; x < tw; x += bs.x) - { - uint32 page = (base + off->block.col[x >> 3]) >> 5; - - if(page < MAX_PAGES) - { - m_pages[page >> 5] |= 1 << (page & 31); - } - } - } - - for(size_t i = 0; i < countof(m_pages); i++) - { - if(uint32 p = m_pages[i]) - { - m_pages[i] = 0; - - list* m = &m_map[i << 5]; - - unsigned long j; - - while(_BitScanForward(&j, p)) - { - p ^= 1 << j; - - m[j].push_front(s); - } - } - } -} - -void GSTextureCache::SourceMap::RemoveAll() -{ - for_each(m_surfaces.begin(), m_surfaces.end(), delete_object()); - - m_surfaces.clear(); - - for(size_t i = 0; i < countof(m_map); i++) - { - m_map[i].clear(); - } -} - -void GSTextureCache::SourceMap::RemoveAt(Source* s) -{ - m_surfaces.erase(s); - - GL_CACHE("TC: Remove Src Texture: %d (0x%x)", - s->m_texture ? s->m_texture->GetID() : 0, - s->m_TEX0.TBP0); - - // Source (except render target) is duplicated for each page they use. - for(size_t start = s->m_TEX0.TBP0 >> 5, end = s->m_target ? start : countof(m_map) - 1; start <= end; start++) - { - list& m = m_map[start]; - - for(list::iterator i = m.begin(); i != m.end(); ) - { - list::iterator j = i++; - - if(*j == s) {m.erase(j); break;} - } - } - - delete s; -} diff --git a/plugins/GSdx_legacy/GSTextureCache.h b/plugins/GSdx_legacy/GSTextureCache.h deleted file mode 100644 index dbd9a430eb..0000000000 --- a/plugins/GSdx_legacy/GSTextureCache.h +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRenderer.h" -#include "GSDirtyRect.h" - -class GSTextureCache -{ -public: - enum {RenderTarget, DepthStencil}; - - class Surface : public GSAlignedClass<32> - { - protected: - GSRenderer* m_renderer; - - public: - GSTexture* m_texture; - GIFRegTEX0 m_TEX0; - GIFRegTEXA m_TEXA; - int m_age; - uint8* m_temp; - bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture - - public: - Surface(GSRenderer* r, uint8* temp); - virtual ~Surface(); - - virtual void Update(); - }; - - class Source : public Surface - { - struct {GSVector4i* rect; uint32 count;} m_write; - - void Write(const GSVector4i& r); - void Flush(uint32 count); - - public: - GSTexture* m_palette; - bool m_initpalette; - uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page - uint32* m_clut; - bool m_target; - bool m_complete; - bool m_repeating; - bool m_spritehack_t; - vector* m_p2t; - - public: - Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp); - virtual ~Source(); - - virtual void Update(const GSVector4i& rect); - }; - - class Target : public Surface - { - public: - int m_type; - bool m_used; - GSDirtyRectList m_dirty; - GSVector4i m_valid; - bool m_depth_supported; - bool m_dirty_alpha; - - public: - Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported); - - virtual void Update(); - }; - - class SourceMap - { - public: - hash_set m_surfaces; - list m_map[MAX_PAGES]; - uint32 m_pages[16]; // bitmap of all pages - bool m_used; - - SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));} - - void Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset* off); - void RemoveAll(); - void RemovePartial(); - void RemoveAt(Source* s); - }; - -protected: - GSRenderer* m_renderer; - SourceMap m_src; - list m_dst[2]; - bool m_paltex; - int m_spritehack; - bool m_preload_frame; - uint8* m_temp; - bool m_can_convert_depth; - int m_crc_hack_level; - static bool m_disable_partial_invalidation; - - virtual Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false); - virtual Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type); - - virtual int Get8bitFormat() = 0; - - // TODO: virtual void Write(Source* s, const GSVector4i& r) = 0; - // TODO: virtual void Write(Target* t, const GSVector4i& r) = 0; -#ifndef DISABLE_HW_TEXTURE_CACHE - virtual void Read(Target* t, const GSVector4i& r) = 0; -#endif - - virtual bool CanConvertDepth() { return m_can_convert_depth; } - -public: - GSTextureCache(GSRenderer* r); - virtual ~GSTextureCache(); -#ifdef DISABLE_HW_TEXTURE_CACHE - virtual void Read(Target* t, const GSVector4i& r) = 0; -#endif - void RemoveAll(); - void RemovePartial(); - - Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); - Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used); - Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h); - - void InvalidateVideoMemType(int type, uint32 bp); - void InvalidateVideoMem(GSOffset* off, const GSVector4i& r, bool target = true); - void InvalidateLocalMem(GSOffset* off, const GSVector4i& r); - - void IncAge(); - bool UserHacks_HalfPixelOffset; - - const char* to_string(int type) { - return (type == DepthStencil) ? "Depth" : "Color"; - } - - void PrintMemoryUsage(); -}; diff --git a/plugins/GSdx_legacy/GSTextureCache11.cpp b/plugins/GSdx_legacy/GSTextureCache11.cpp deleted file mode 100644 index 72c4251935..0000000000 --- a/plugins/GSdx_legacy/GSTextureCache11.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "StdAfx.h" -#include "GSTextureCache11.h" - -// GSTextureCache11 - -GSTextureCache11::GSTextureCache11(GSRenderer* r) - : GSTextureCache(r) -{ -} - -void GSTextureCache11::Read(Target* t, const GSVector4i& r) -{ - if(t->m_type != RenderTarget) - { - // TODO - - return; - } - - const GIFRegTEX0& TEX0 = t->m_TEX0; - - if(TEX0.PSM != PSM_PSMCT32 - && TEX0.PSM != PSM_PSMCT24 - && TEX0.PSM != PSM_PSMCT16 - && TEX0.PSM != PSM_PSMCT16S) - { - //ASSERT(0); - - return; - } - - if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0)) - { - return; - } - - // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); - - int w = r.width(); - int h = r.height(); - - GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - - DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; - - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format)) - { - GSTexture::GSMap m; - - if(offscreen->Map(m)) - { - // TODO: block level write - - GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - - switch(TEX0.PSM) - { - case PSM_PSMCT32: - m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); - break; - case PSM_PSMCT24: - m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); - break; - default: - ASSERT(0); - } - - offscreen->Unmap(); - } - - m_renderer->m_dev->Recycle(offscreen); - } -} - diff --git a/plugins/GSdx_legacy/GSTextureCache11.h b/plugins/GSdx_legacy/GSTextureCache11.h deleted file mode 100644 index d110dbe156..0000000000 --- a/plugins/GSdx_legacy/GSTextureCache11.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSTextureCache.h" -#include "GSDevice11.h" - -class GSTextureCache11 : public GSTextureCache -{ -protected: - int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} - - void Read(Target* t, const GSVector4i& r); - - virtual bool CanConvertDepth() { return false; } - -public: - GSTextureCache11(GSRenderer* r); -}; diff --git a/plugins/GSdx_legacy/GSTextureCache9.cpp b/plugins/GSdx_legacy/GSTextureCache9.cpp deleted file mode 100644 index 1820efc2ad..0000000000 --- a/plugins/GSdx_legacy/GSTextureCache9.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "StdAfx.h" -#include "GSTextureCache9.h" - -// GSTextureCache9 - -GSTextureCache9::GSTextureCache9(GSRenderer* r) - : GSTextureCache(r) -{ -} - -void GSTextureCache9::Read(Target* t, const GSVector4i& r) -{ - if(t->m_type != RenderTarget) - { - // TODO - - return; - } - - const GIFRegTEX0& TEX0 = t->m_TEX0; - - if(TEX0.PSM != PSM_PSMCT32 - && TEX0.PSM != PSM_PSMCT24 - && TEX0.PSM != PSM_PSMCT16 - && TEX0.PSM != PSM_PSMCT16S) - { - //ASSERT(0); - - return; - } - - if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0)) - { - return; - } - - // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); - - int w = r.width(); - int h = r.height(); - - GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h)) - { - GSTexture::GSMap m; - - if(offscreen->Map(m)) - { - // TODO: block level write - - GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - - switch(TEX0.PSM) - { - case PSM_PSMCT32: - m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); - break; - case PSM_PSMCT24: - m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - m_renderer->m_mem.WriteFrame16(m.bits, m.pitch, off, r); - break; - default: - ASSERT(0); - } - - offscreen->Unmap(); - } - - m_renderer->m_dev->Recycle(offscreen); - } -} - diff --git a/plugins/GSdx_legacy/GSTextureCache9.h b/plugins/GSdx_legacy/GSTextureCache9.h deleted file mode 100644 index 1fbf701860..0000000000 --- a/plugins/GSdx_legacy/GSTextureCache9.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSTextureCache.h" -#include "GSDevice9.h" - -class GSTextureCache9 : public GSTextureCache -{ -protected: - int Get8bitFormat() {return D3DFMT_A8;} - - void Read(Target* t, const GSVector4i& r); - - virtual bool CanConvertDepth() { return false; } - -public: - GSTextureCache9(GSRenderer* r); -}; diff --git a/plugins/GSdx_legacy/GSTextureCacheOGL.cpp b/plugins/GSdx_legacy/GSTextureCacheOGL.cpp deleted file mode 100644 index ee89c6966d..0000000000 --- a/plugins/GSdx_legacy/GSTextureCacheOGL.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTextureCacheOGL.h" - -GSTextureCacheOGL::GSTextureCacheOGL(GSRenderer* r) - : GSTextureCache(r) -{ -} - -void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) -{ - if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0)) - return; - - const GIFRegTEX0& TEX0 = t->m_TEX0; - - GLuint fmt; - int ps_shader; - switch (TEX0.PSM) - { - case PSM_PSMCT32: - case PSM_PSMCT24: - fmt = GL_RGBA8; - ps_shader = 0; - break; - - case PSM_PSMCT16: - case PSM_PSMCT16S: - fmt = GL_R16UI; - ps_shader = 1; - break; - - case PSM_PSMZ32: - fmt = GL_R32UI; - ps_shader = 10; - break; - - case PSM_PSMZ24: - fmt = GL_R32UI; - ps_shader = 10; - break; - - case PSM_PSMZ16: - case PSM_PSMZ16S: - fmt = GL_R16UI; - ps_shader = 10; - break; - - default: - return; - } - - - // Yes lots of logging, but I'm not confident with this code - GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM); - - GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", - t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height()); - - GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader)) - { - GSTexture::GSMap m; - GSVector4i r_offscreen(0, 0, r.width(), r.height()); - - if(offscreen->Map(m, &r_offscreen)) - { - // TODO: block level write - - GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - - switch(TEX0.PSM) - { - case PSM_PSMCT32: - m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); - break; - case PSM_PSMCT24: - m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); - break; - - case PSM_PSMZ32: - m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); - break; - case PSM_PSMZ24: - m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); - break; - case PSM_PSMZ16: - case PSM_PSMZ16S: - m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); - break; - - default: - ASSERT(0); - } - - offscreen->Unmap(); - } - - // FIXME invalidate data - m_renderer->m_dev->Recycle(offscreen); - } - - GL_POP(); -} - diff --git a/plugins/GSdx_legacy/GSTextureCacheOGL.h b/plugins/GSdx_legacy/GSTextureCacheOGL.h deleted file mode 100644 index 840826a39b..0000000000 --- a/plugins/GSdx_legacy/GSTextureCacheOGL.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSTextureCache.h" -#include "GSDeviceOGL.h" - -class GSTextureCacheOGL final : public GSTextureCache -{ -protected: - int Get8bitFormat() { return GL_R8;} - - void Read(Target* t, const GSVector4i& r); - -public: - GSTextureCacheOGL(GSRenderer* r); -}; diff --git a/plugins/GSdx_legacy/GSTextureCacheSW.cpp b/plugins/GSdx_legacy/GSTextureCacheSW.cpp deleted file mode 100644 index 8d1a2ba6f3..0000000000 --- a/plugins/GSdx_legacy/GSTextureCacheSW.cpp +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTextureCacheSW.h" - -GSTextureCacheSW::GSTextureCacheSW(GSState* state) - : m_state(state) -{ -} - -GSTextureCacheSW::~GSTextureCacheSW() -{ - RemoveAll(); -} - -GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0) -{ - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; - - Texture* t = NULL; - - list& m = m_map[TEX0.TBP0 >> 5]; - - for(list::iterator i = m.begin(); i != m.end(); i++) - { - Texture* t2 = *i; - - if(((TEX0.u32[0] ^ t2->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t2->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH - { - continue; - } - - if((psm.trbpp == 16 || psm.trbpp == 24) && TEX0.TCC && TEXA != t2->m_TEXA) - { - continue; - } - - if(tw0 != 0 && t2->m_tw != tw0) - { - continue; - } - - m.splice(m.begin(), m, i); - - t = t2; - - t->m_age = 0; - - break; - } - - if(t == NULL) - { - t = new Texture(m_state, tw0, TEX0, TEXA); - - m_textures.insert(t); - - for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) - { - m_map[*p].push_front(t); - } - } - - return t; -} - -void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm) -{ - for(const uint32* p = pages; *p != GSOffset::EOP; p++) - { - uint32 page = *p; - - const list& map = m_map[page]; - - for(list::const_iterator i = map.begin(); i != map.end(); i++) - { - Texture* t = *i; - - if(GSUtil::HasSharedBits(psm, t->m_sharedbits)) - { - uint32* RESTRICT valid = t->m_valid; - - if(t->m_repeating) - { - vector& l = t->m_p2t[page]; - - for(vector::iterator j = l.begin(); j != l.end(); j++) - { - valid[j->x] &= j->y; - } - } - else - { - valid[page] = 0; - } - - t->m_complete = false; - } - } - } -} - -void GSTextureCacheSW::RemoveAll() -{ - for_each(m_textures.begin(), m_textures.end(), delete_object()); - - m_textures.clear(); - - for(int i = 0; i < MAX_PAGES; i++) - { - m_map[i].clear(); - } -} - -void GSTextureCacheSW::IncAge() -{ - for(hash_set::iterator i = m_textures.begin(); i != m_textures.end(); ) - { - hash_set::iterator j = i++; - - Texture* t = *j; - - if(++t->m_age > 10) - { - m_textures.erase(j); - - for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) - { - list& m = m_map[*p]; - - for(list::iterator i = m.begin(); i != m.end(); ) - { - list::iterator j = i++; - - if(*j == t) {m.erase(j); break;} - } - } - - delete t; - } - } -} - -// - -GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) - : m_state(state) - , m_buff(NULL) - , m_tw(tw0) - , m_age(0) - , m_complete(false) - , m_p2t(NULL) -{ - m_TEX0 = TEX0; - m_TEXA = TEXA; - - if(m_tw == 0) - { - m_tw = std::max(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff - } - - memset(m_valid, 0, sizeof(m_valid)); - memset(m_pages.bm, 0, sizeof(m_pages.bm)); - - m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM); - - m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - - m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); - - for(const uint32* p = m_pages.n; *p != GSOffset::EOP; p++) - { - uint32 page = *p; - - m_pages.bm[page >> 5] |= 1 << (page & 31); - } - - m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower - - if(m_repeating) - { - m_p2t = m_state->m_mem.GetPage2TileMap(m_TEX0); - } -} - -GSTextureCacheSW::Texture::~Texture() -{ - delete [] m_pages.n; - - if(m_buff) - { - _aligned_free(m_buff); - } -} - -bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) -{ - if(m_complete) - { - return true; - } - - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; - - GSVector2i bs = psm.bs; - - int shift = psm.pal == 0 ? 2 : 0; - - int tw = std::max(1 << m_TEX0.TW, bs.x); - int th = std::max(1 << m_TEX0.TH, bs.y); - - GSVector4i r = rect; - - r = r.ralign(bs); - - if(r.eq(GSVector4i(0, 0, tw, th))) - { - m_complete = true; // lame, but better than nothing - } - - if(m_buff == NULL) - { - uint32 pitch = (1 << m_tw) << shift; - - m_buff = _aligned_malloc(pitch * th * 4, 32); - - if(m_buff == NULL) - { - return false; - } - } - - GSLocalMemory& mem = m_state->m_mem; - - const GSOffset* RESTRICT off = m_offset; - - uint32 blocks = 0; - - GSLocalMemory::readTextureBlock rtxbP = psm.rtxbP; - - uint32 pitch = (1 << m_tw) << shift; - - uint8* dst = (uint8*)m_buff + pitch * r.top; - - int block_pitch = pitch * bs.y; - - r = r.srl32(3); - - bs.x >>= 3; - bs.y >>= 3; - - shift += 3; - - if(m_repeating) - { - for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) - { - uint32 base = off->block.row[y]; - - for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) - { - uint32 block = base + off->block.col[x]; - - if(block < MAX_BLOCKS) - { - uint32 row = i >> 5; - uint32 col = 1 << (i & 31); - - if((m_valid[row] & col) == 0) - { - m_valid[row] |= col; - - (mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA); - - blocks++; - } - } - } - } - } - else - { - for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) - { - uint32 base = off->block.row[y]; - - for(int x = r.left; x < r.right; x += bs.x) - { - uint32 block = base + off->block.col[x]; - - if(block < MAX_BLOCKS) - { - uint32 row = block >> 5; - uint32 col = 1 << (block & 31); - - if((m_valid[row] & col) == 0) - { - m_valid[row] |= col; - - (mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA); - - blocks++; - } - } - } - } - } - - if(blocks > 0) - { - m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift); - } - - return true; -} - -#include "GSTextureSW.h" - -bool GSTextureCacheSW::Texture::Save(const string& fn, bool dds) const -{ - const uint32* RESTRICT clut = m_state->m_mem.m_clut; - - int w = 1 << m_TEX0.TW; - int h = 1 << m_TEX0.TH; - - GSTextureSW t(0, w, h); - - GSTexture::GSMap m; - - if(t.Map(m, NULL)) - { - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; - - const uint8* RESTRICT src = (uint8*)m_buff; - int pitch = 1 << (m_tw + (psm.pal == 0 ? 2 : 0)); - - for(int j = 0; j < h; j++, src += pitch, m.bits += m.pitch) - { - if(psm.pal == 0) - { - memcpy(m.bits, src, sizeof(uint32) * w); - } - else - { - for(int i = 0; i < w; i++) - { - ((uint32*)m.bits)[i] = clut[src[i]]; - } - } - } - - t.Unmap(); - - return t.Save(fn.c_str()); - } - - return false; -} \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSTextureCacheSW.h b/plugins/GSdx_legacy/GSTextureCacheSW.h deleted file mode 100644 index ed59acb35d..0000000000 --- a/plugins/GSdx_legacy/GSTextureCacheSW.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSRenderer.h" - -class GSTextureCacheSW -{ -public: - class Texture - { - public: - GSState* m_state; - GSOffset* m_offset; - GIFRegTEX0 m_TEX0; - GIFRegTEXA m_TEXA; - void* m_buff; - uint32 m_tw; - uint32 m_age; - bool m_complete; - bool m_repeating; - vector* m_p2t; - uint32 m_valid[MAX_PAGES]; - struct {uint32 bm[16]; const uint32* n;} m_pages; - const uint32* RESTRICT m_sharedbits; - - // m_valid - // fast mode: each uint32 bits map to the 32 blocks of that page - // repeating mode: 1 bpp image of the texture tiles (8x8), also having 512 elements is just a coincidence (worst case: (1024*1024)/(8*8)/(sizeof(uint32)*8)) - - Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); - virtual ~Texture(); - - bool Update(const GSVector4i& r); - bool Save(const string& fn, bool dds = false) const; - }; - -protected: - GSState* m_state; - hash_set m_textures; - list m_map[MAX_PAGES]; - -public: - GSTextureCacheSW(GSState* state); - virtual ~GSTextureCacheSW(); - - Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0); - - void InvalidatePages(const uint32* pages, uint32 psm); - - void RemoveAll(); - void IncAge(); -}; diff --git a/plugins/GSdx_legacy/GSTextureFX.cpp b/plugins/GSdx_legacy/GSTextureFX.cpp deleted file mode 100644 index 4deab34da2..0000000000 --- a/plugins/GSdx_legacy/GSTextureFX.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTextureFX.h" - -GSTextureFX::GSTextureFX() - : m_dev(NULL) -{ -} - -bool GSTextureFX::Create(GSDevice* dev) -{ - m_dev = dev; - - return true; -} - diff --git a/plugins/GSdx_legacy/GSTextureFX11.cpp b/plugins/GSdx_legacy/GSTextureFX11.cpp deleted file mode 100644 index 77cf05d2fb..0000000000 --- a/plugins/GSdx_legacy/GSTextureFX11.cpp +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDevice11.h" -#include "resource.h" -#include "GSTables.h" - -bool GSDevice11::CreateTextureFX() -{ - HRESULT hr; - - D3D11_BUFFER_DESC bd; - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(VSConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_vs_cb); - - if(FAILED(hr)) return false; - - memset(&bd, 0, sizeof(bd)); - - bd.ByteWidth = sizeof(PSConstantBuffer); - bd.Usage = D3D11_USAGE_DEFAULT; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - hr = m_dev->CreateBuffer(&bd, NULL, &m_ps_cb); - - if(FAILED(hr)) return false; - - D3D11_SAMPLER_DESC sd; - - memset(&sd, 0, sizeof(sd)); - - sd.Filter = theApp.GetConfig("MaxAnisotropy", 0) && !theApp.GetConfig("paltex", 0) ? D3D11_FILTER_ANISOTROPIC : D3D11_FILTER_MIN_MAG_MIP_POINT; - sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.MinLOD = -FLT_MAX; - sd.MaxLOD = FLT_MAX; - sd.MaxAnisotropy = theApp.GetConfig("MaxAnisotropy", 0); - sd.ComparisonFunc = D3D11_COMPARISON_NEVER; - - hr = m_dev->CreateSamplerState(&sd, &m_palette_ss); - - if(FAILED(hr)) return false; - - hr = m_dev->CreateSamplerState(&sd, &m_rt_ss); - - if(FAILED(hr)) return false; - - // create layout - - VSSelector sel; - VSConstantBuffer cb; - - SetupVS(sel, &cb); - - // - - return true; -} - -void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb) -{ - hash_map::const_iterator i = m_vs.find(sel); - - if(i == m_vs.end()) - { - string str[4]; - - str[0] = format("%d", sel.bppz); - str[1] = format("%d", sel.tme); - str[2] = format("%d", sel.fst); - str[3] = format("%d", sel.rtcopy); - - D3D_SHADER_MACRO macro[] = - { - {"VS_BPPZ", str[0].c_str()}, - {"VS_TME", str[1].c_str()}, - {"VS_FST", str[2].c_str()}, - {"VS_RTCOPY", str[3].c_str()}, - {NULL, NULL}, - }; - - D3D11_INPUT_ELEMENT_DESC layout[] = - { - {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0}, - }; - - GSVertexShader11 vs; - - vector shader; - theApp.LoadResource(IDR_TFX_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "tfx.fx", nullptr, "vs_main", macro, &vs.vs, layout, countof(layout), &vs.il); - - m_vs[sel] = vs; - - i = m_vs.find(sel); - } - - if(m_vs_cb_cache.Update(cb)) - { - ID3D11DeviceContext* ctx = m_ctx; - - ctx->UpdateSubresource(m_vs_cb, 0, NULL, cb, 0, 0); - } - - VSSetShader(i->second.vs, m_vs_cb); - - IASetInputLayout(i->second.il); -} - -void GSDevice11::SetupGS(GSSelector sel) -{ - CComPtr gs; - - if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) // geometry shader works in every case, but not needed - { - hash_map >::const_iterator i = m_gs.find(sel); - - if(i != m_gs.end()) - { - gs = i->second; - } - else - { - string str[2]; - - str[0] = format("%d", sel.iip); - str[1] = format("%d", sel.prim); - - D3D_SHADER_MACRO macro[] = - { - {"GS_IIP", str[0].c_str()}, - {"GS_PRIM", str[1].c_str()}, - {NULL, NULL}, - }; - - vector shader; - theApp.LoadResource(IDR_TFX_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "tfx.fx", nullptr, "gs_main", macro, &gs); - - m_gs[sel] = gs; - } - } - - GSSetShader(gs); -} - -void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) -{ - hash_map >::const_iterator i = m_ps.find(sel); - - if(i == m_ps.end()) - { - string str[20]; - - str[0] = format("%d", sel.fst); - str[1] = format("%d", sel.wms); - str[2] = format("%d", sel.wmt); - str[3] = format("%d", sel.fmt); - str[4] = format("%d", sel.aem); - str[5] = format("%d", sel.tfx); - str[6] = format("%d", sel.tcc); - str[7] = format("%d", sel.atst); - str[8] = format("%d", sel.fog); - str[9] = format("%d", sel.clr1); - str[10] = format("%d", sel.fba); - str[11] = format("%d", sel.aout); - str[12] = format("%d", sel.ltf); - str[13] = format("%d", sel.colclip); - str[14] = format("%d", sel.date); - str[15] = format("%d", sel.spritehack); - str[16] = format("%d", sel.tcoffsethack); - str[17] = format("%d", sel.point_sampler); - str[18] = format("%d", sel.shuffle); - str[19] = format("%d", sel.read_ba); - - D3D_SHADER_MACRO macro[] = - { - {"PS_FST", str[0].c_str()}, - {"PS_WMS", str[1].c_str()}, - {"PS_WMT", str[2].c_str()}, - {"PS_FMT", str[3].c_str()}, - {"PS_AEM", str[4].c_str()}, - {"PS_TFX", str[5].c_str()}, - {"PS_TCC", str[6].c_str()}, - {"PS_ATST", str[7].c_str()}, - {"PS_FOG", str[8].c_str()}, - {"PS_CLR1", str[9].c_str()}, - {"PS_FBA", str[10].c_str()}, - {"PS_AOUT", str[11].c_str()}, - {"PS_LTF", str[12].c_str()}, - {"PS_COLCLIP", str[13].c_str()}, - {"PS_DATE", str[14].c_str()}, - {"PS_SPRITEHACK", str[15].c_str()}, - {"PS_TCOFFSETHACK", str[16].c_str()}, - {"PS_POINT_SAMPLER", str[17].c_str()}, - {"PS_SHUFFLE", str[18].c_str() }, - {"PS_READ_BA", str[19].c_str() }, - {NULL, NULL}, - }; - - CComPtr ps; - - vector shader; - theApp.LoadResource(IDR_TFX_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "tfx.fx", nullptr, "ps_main", macro, &ps); - - m_ps[sel] = ps; - - i = m_ps.find(sel); - } - - if(m_ps_cb_cache.Update(cb)) - { - ID3D11DeviceContext* ctx = m_ctx; - - ctx->UpdateSubresource(m_ps_cb, 0, NULL, cb, 0, 0); - } - - CComPtr ss0, ss1; - - if(sel.tfx != 4) - { - if(!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3)) - { - ssel.ltf = 0; - } - - hash_map >::const_iterator i = m_ps_ss.find(ssel); - - if(i != m_ps_ss.end()) - { - ss0 = i->second; - } - else - { - D3D11_SAMPLER_DESC sd, af; - - memset(&sd, 0, sizeof(sd)); - - af.Filter = theApp.GetConfig("MaxAnisotropy", 0) && !theApp.GetConfig("paltex", 0) ? D3D11_FILTER_ANISOTROPIC : D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; - sd.Filter = ssel.ltf ? af.Filter : D3D11_FILTER_MIN_MAG_MIP_POINT; - - sd.AddressU = ssel.tau ? D3D11_TEXTURE_ADDRESS_WRAP : D3D11_TEXTURE_ADDRESS_CLAMP; - sd.AddressV = ssel.tav ? D3D11_TEXTURE_ADDRESS_WRAP : D3D11_TEXTURE_ADDRESS_CLAMP; - sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - sd.MinLOD = -FLT_MAX; - sd.MaxLOD = FLT_MAX; - sd.MaxAnisotropy = theApp.GetConfig("MaxAnisotropy", 0); - sd.ComparisonFunc = D3D11_COMPARISON_NEVER; - - m_dev->CreateSamplerState(&sd, &ss0); - - m_ps_ss[ssel] = ss0; - } - - if(sel.fmt >= 3) - { - ss1 = m_palette_ss; - } - } - - PSSetSamplerState(ss0, ss1, sel.date ? m_rt_ss : NULL); - - PSSetShader(i->second, m_ps_cb); -} - -void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix) -{ - hash_map >::const_iterator i = m_om_dss.find(dssel); - - if(i == m_om_dss.end()) - { - D3D11_DEPTH_STENCIL_DESC dsd; - - memset(&dsd, 0, sizeof(dsd)); - - if(dssel.date) - { - dsd.StencilEnable = true; - dsd.StencilReadMask = 1; - dsd.StencilWriteMask = 1; - dsd.FrontFace.StencilFunc = D3D11_COMPARISON_EQUAL; - dsd.FrontFace.StencilPassOp = dssel.alpha_stencil ? D3D11_STENCIL_OP_ZERO : D3D11_STENCIL_OP_KEEP; - dsd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; - dsd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; - dsd.BackFace.StencilFunc = D3D11_COMPARISON_EQUAL; - dsd.BackFace.StencilPassOp = dssel.alpha_stencil ? D3D11_STENCIL_OP_ZERO : D3D11_STENCIL_OP_KEEP; - dsd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; - dsd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; - } - - if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) - { - static const D3D11_COMPARISON_FUNC ztst[] = - { - D3D11_COMPARISON_NEVER, - D3D11_COMPARISON_ALWAYS, - D3D11_COMPARISON_GREATER_EQUAL, - D3D11_COMPARISON_GREATER - }; - - dsd.DepthEnable = true; - dsd.DepthWriteMask = dssel.zwe ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO; - dsd.DepthFunc = ztst[dssel.ztst]; - } - - CComPtr dss; - - m_dev->CreateDepthStencilState(&dsd, &dss); - - m_om_dss[dssel] = dss; - - i = m_om_dss.find(dssel); - } - - OMSetDepthStencilState(i->second, 1); - - hash_map >::const_iterator j = m_om_bs.find(bsel); - - if(j == m_om_bs.end()) - { - D3D11_BLEND_DESC bd; - - memset(&bd, 0, sizeof(bd)); - - bd.RenderTarget[0].BlendEnable = bsel.abe; - - if(bsel.abe) - { - int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d; - - bd.RenderTarget[0].BlendOp = (D3D11_BLEND_OP)m_blendMapD3D9[i].op; - bd.RenderTarget[0].SrcBlend = (D3D11_BLEND)m_blendMapD3D9[i].src; - bd.RenderTarget[0].DestBlend = (D3D11_BLEND)m_blendMapD3D9[i].dst; - bd.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - bd.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - bd.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; - - // SRC* -> SRC1* - // Yes, this casting mess really is needed. I want to go back to C - - if(bd.RenderTarget[0].SrcBlend >= 3 && bd.RenderTarget[0].SrcBlend <= 6) - { - bd.RenderTarget[0].SrcBlend = (D3D11_BLEND)((int)bd.RenderTarget[0].SrcBlend + 13); - } - - if(bd.RenderTarget[0].DestBlend >= 3 && bd.RenderTarget[0].DestBlend <= 6) - { - bd.RenderTarget[0].DestBlend = (D3D11_BLEND)((int)bd.RenderTarget[0].DestBlend + 13); - } - - // Not very good but I don't wanna write another 81 row table - - if(bsel.negative) - { - if(bd.RenderTarget[0].BlendOp == D3D11_BLEND_OP_ADD) - { - bd.RenderTarget[0].BlendOp = D3D11_BLEND_OP_REV_SUBTRACT; - } - else if(bd.RenderTarget[0].BlendOp == D3D11_BLEND_OP_REV_SUBTRACT) - { - bd.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; - } - else - ; // god knows, best just not to mess with it for now - } - - if(m_blendMapD3D9[i].bogus == 1) - { - (bsel.a == 0 ? bd.RenderTarget[0].SrcBlend : bd.RenderTarget[0].DestBlend) = D3D11_BLEND_ONE; - - const string afixstr = format("%d >> 7", afix); - const char *col[3] = {"Cs", "Cd", "0"}; - const char *alpha[3] = {"As", "Ad", afixstr.c_str()}; - - printf("Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]); - } - } - - if(bsel.wr) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_RED; - if(bsel.wg) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_GREEN; - if(bsel.wb) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_BLUE; - if(bsel.wa) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_ALPHA; - - CComPtr bs; - - m_dev->CreateBlendState(&bd, &bs); - - m_om_bs[bsel] = bs; - - j = m_om_bs.find(bsel); - } - - OMSetBlendState(j->second, (float)(int)afix / 0x80); -} diff --git a/plugins/GSdx_legacy/GSTextureFX9.cpp b/plugins/GSdx_legacy/GSTextureFX9.cpp deleted file mode 100644 index 66f735c17c..0000000000 --- a/plugins/GSdx_legacy/GSTextureFX9.cpp +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDevice9.h" -#include "resource.h" -#include "GSTables.h" - -GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix) -{ - GSTexture* t = NULL; - - uint32 hash = (size << 20) | (msk << 10) | fix; - - hash_map::iterator i = m_mskfix.find(hash); - - if(i != m_mskfix.end()) - { - t = i->second; - } - else - { - t = CreateTexture(size, 1, D3DFMT_R32F); - - if(t) - { - GSTexture::GSMap m; - - if(t->Map(m)) - { - for(uint32 i = 0; i < size; i++) - { - ((float*)m.bits)[i] = (float)((i & msk) | fix) / size; - } - - t->Unmap(); - } - - m_mskfix[hash] = t; - } - } - - return t; -} - -void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb) -{ - hash_map::const_iterator i = m_vs.find(sel); - - if(i == m_vs.end()) - { - string str[5]; - - str[0] = format("%d", sel.bppz); - str[1] = format("%d", sel.tme); - str[2] = format("%d", sel.fst); - str[3] = format("%d", sel.logz); - str[4] = format("%d", sel.rtcopy); - - D3D_SHADER_MACRO macro[] = - { - {"VS_BPPZ", str[0].c_str()}, - {"VS_TME", str[1].c_str()}, - {"VS_FST", str[2].c_str()}, - {"VS_LOGZ", str[3].c_str()}, - {"VS_RTCOPY", str[4].c_str()}, - {NULL, NULL}, - }; - - static const D3DVERTEXELEMENT9 layout[] = - { - {0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0}, - {0, 8, D3DDECLTYPE_D3DCOLOR, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0}, - {0, 12, D3DDECLTYPE_D3DCOLOR, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 1}, - {0, 16, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0}, - D3DDECL_END() - }; - - GSVertexShader9 vs; - - vector shader; - theApp.LoadResource(IDR_TFX_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "tfx.fx", "vs_main", macro, &vs.vs, layout, countof(layout), &vs.il); - - m_vs[sel] = vs; - - i = m_vs.find(sel); - } - - VSSetShader(i->second.vs, (const float*)cb, sizeof(*cb) / sizeof(GSVector4)); - - IASetInputLayout(i->second.il); -} - -void GSDevice9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) -{ - if(cb->WH.z > 0 && cb->WH.w > 0 && (sel.wms == 3 || sel.wmt == 3)) - { - GSVector4i size(cb->WH); - - if(sel.wms == 3) - { - if(GSTexture* t = CreateMskFix(size.z, cb->MskFix.x, cb->MskFix.z)) - { - m_dev->SetTexture(3, *(GSTexture9*)t); - } - } - - if(sel.wmt == 3) - { - if(GSTexture* t = CreateMskFix(size.w, cb->MskFix.y, cb->MskFix.w)) - { - m_dev->SetTexture(4, *(GSTexture9*)t); - } - } - } - - hash_map >::const_iterator i = m_ps.find(sel); - - if(i == m_ps.end()) - { - string str[17]; - - str[0] = format("%d", sel.fst); - str[1] = format("%d", sel.wms); - str[2] = format("%d", sel.wmt); - str[3] = format("%d", sel.fmt); - str[4] = format("%d", sel.aem); - str[5] = format("%d", sel.tfx); - str[6] = format("%d", sel.tcc); - str[7] = format("%d", sel.atst); - str[8] = format("%d", sel.fog); - str[9] = format("%d", sel.clr1); - str[10] = format("%d", sel.rt); - str[11] = format("%d", sel.ltf); - str[12] = format("%d", sel.colclip); - str[13] = format("%d", sel.date); - str[14] = format("%d", sel.spritehack); - str[15] = format("%d", sel.tcoffsethack); - str[16] = format("%d", sel.point_sampler); - - D3D_SHADER_MACRO macro[] = - { - {"PS_FST", str[0].c_str()}, - {"PS_WMS", str[1].c_str()}, - {"PS_WMT", str[2].c_str()}, - {"PS_FMT", str[3].c_str()}, - {"PS_AEM", str[4].c_str()}, - {"PS_TFX", str[5].c_str()}, - {"PS_TCC", str[6].c_str()}, - {"PS_ATST", str[7].c_str()}, - {"PS_FOG", str[8].c_str()}, - {"PS_CLR1", str[9].c_str()}, - {"PS_RT", str[10].c_str()}, - {"PS_LTF", str[11].c_str()}, - {"PS_COLCLIP", str[12].c_str()}, - {"PS_DATE", str[13].c_str()}, - {"PS_SPRITEHACK", str[14].c_str()}, - {"PS_TCOFFSETHACK", str[15].c_str()}, - {"PS_POINT_SAMPLER", str[16].c_str()}, - {NULL, NULL}, - }; - - CComPtr ps; - - vector shader; - theApp.LoadResource(IDR_TFX_FX, shader); - CompileShader((const char *)shader.data(), shader.size(), "tfx.fx", "ps_main", macro, &ps); - - m_ps[sel] = ps; - - i = m_ps.find(sel); - } - - PSSetShader(i->second, (const float*)cb, sizeof(*cb) / sizeof(GSVector4)); - - Direct3DSamplerState9* ss = NULL; - - if(sel.tfx != 4) - { - if(!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3)) - { - ssel.ltf = 0; - } - - hash_map::const_iterator i = m_ps_ss.find(ssel); - - if(i != m_ps_ss.end()) - { - ss = i->second; - } - else - { - ss = new Direct3DSamplerState9(); - - memset(ss, 0, sizeof(*ss)); - - ss->Anisotropic[0] = theApp.GetConfig("MaxAnisotropy", 0) && !theApp.GetConfig("paltex", 0) ? D3DTEXF_ANISOTROPIC : D3DTEXF_LINEAR; - ss->Anisotropic[1] = theApp.GetConfig("MaxAnisotropy", 0) && !theApp.GetConfig("paltex", 0) ? D3DTEXF_ANISOTROPIC : D3DTEXF_POINT; - ss->FilterMin[0] = ssel.ltf ? ss->Anisotropic[0] : D3DTEXF_POINT; - ss->FilterMag[0] = ssel.ltf ? ss->Anisotropic[0] : D3DTEXF_POINT; - ss->FilterMip[0] = ssel.ltf ? ss->Anisotropic[0] : D3DTEXF_POINT; - ss->FilterMin[1] = ss->Anisotropic[1]; - ss->FilterMag[1] = ss->Anisotropic[1]; - ss->FilterMip[1] = ss->Anisotropic[1]; - ss->AddressU = ssel.tau ? D3DTADDRESS_WRAP : D3DTADDRESS_CLAMP; - ss->AddressV = ssel.tav ? D3DTADDRESS_WRAP : D3DTADDRESS_CLAMP; - ss->MaxAnisotropy = theApp.GetConfig("MaxAnisotropy", 0); - ss->MaxLOD = ULONG_MAX; - - - m_ps_ss[ssel] = ss; - } - } - - PSSetSamplerState(ss); -} - -void GSDevice9::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix) -{ - Direct3DDepthStencilState9* dss = NULL; - - hash_map::const_iterator i = m_om_dss.find(dssel); - - if(i == m_om_dss.end()) - { - dss = new Direct3DDepthStencilState9(); - - memset(dss, 0, sizeof(*dss)); - - if(dssel.date || dssel.fba) - { - dss->StencilEnable = true; - dss->StencilReadMask = 1; - dss->StencilWriteMask = dssel.alpha_stencil ? 3 : 2; - dss->StencilFunc = dssel.date ? D3DCMP_EQUAL : D3DCMP_ALWAYS; - dss->StencilPassOp = dssel.alpha_stencil ? D3DSTENCILOP_ZERO : dssel.fba ? D3DSTENCILOP_REPLACE : D3DSTENCILOP_KEEP; - dss->StencilFailOp = dssel.fba && !dssel.alpha_stencil ? D3DSTENCILOP_ZERO : D3DSTENCILOP_KEEP; - dss->StencilDepthFailOp = D3DSTENCILOP_KEEP; - dss->StencilRef = 3; - } - - if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) - { - static const D3DCMPFUNC ztst[] = - { - D3DCMP_NEVER, - D3DCMP_ALWAYS, - D3DCMP_GREATEREQUAL, - D3DCMP_GREATER - }; - - dss->DepthEnable = true; - dss->DepthWriteMask = dssel.zwe; - dss->DepthFunc = ztst[dssel.ztst]; - } - - m_om_dss[dssel] = dss; - - i = m_om_dss.find(dssel); - } - - OMSetDepthStencilState(i->second); - - hash_map::const_iterator j = m_om_bs.find(bsel); - - if(j == m_om_bs.end()) - { - Direct3DBlendState9* bs = new Direct3DBlendState9(); - - memset(bs, 0, sizeof(*bs)); - - bs->BlendEnable = bsel.abe; - - if(bsel.abe) - { - int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d; - - bs->BlendOp = (D3DBLENDOP)m_blendMapD3D9[i].op; - bs->SrcBlend = (D3DBLEND)m_blendMapD3D9[i].src; - bs->DestBlend = (D3DBLEND)m_blendMapD3D9[i].dst; - bs->BlendOpAlpha = D3DBLENDOP_ADD; - bs->SrcBlendAlpha = D3DBLEND_ONE; - bs->DestBlendAlpha = D3DBLEND_ZERO; - - // Not very good but I don't wanna write another 81 row table - - if(bsel.negative) - { - if(bs->BlendOp == D3DBLENDOP_ADD) - { - bs->BlendOp = D3DBLENDOP_REVSUBTRACT; - } - else if(bs->BlendOp == D3DBLENDOP_REVSUBTRACT) - { - bs->BlendOp = D3DBLENDOP_ADD; - } - else - ; // god knows, best just not to mess with it for now - } - - if(m_blendMapD3D9[i].bogus == 1) - { - (bsel.a == 0 ? bs->SrcBlend : bs->DestBlend) = D3DBLEND_ONE; - - const string afixstr = format("%d >> 7", afix); - const char *col[3] = {"Cs", "Cd", "0"}; - const char *alpha[3] = {"As", "Ad", afixstr.c_str()}; - - printf("Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]); - } - } - - // this is not a typo; dx9 uses BGRA rather than the gs native RGBA, unlike dx10 - - if(bsel.wr) bs->RenderTargetWriteMask |= D3DCOLORWRITEENABLE_BLUE; - if(bsel.wg) bs->RenderTargetWriteMask |= D3DCOLORWRITEENABLE_GREEN; - if(bsel.wb) bs->RenderTargetWriteMask |= D3DCOLORWRITEENABLE_RED; - if(bsel.wa) bs->RenderTargetWriteMask |= D3DCOLORWRITEENABLE_ALPHA; - - m_om_bs[bsel] = bs; - - j = m_om_bs.find(bsel); - } - - OMSetBlendState(j->second, afix >= 0x80 ? 0xffffff : 0x020202 * afix); -} diff --git a/plugins/GSdx_legacy/GSTextureFXOGL.cpp b/plugins/GSdx_legacy/GSTextureFXOGL.cpp deleted file mode 100644 index 4c89ae9647..0000000000 --- a/plugins/GSdx_legacy/GSTextureFXOGL.cpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSDeviceOGL.h" -#include "GSTables.h" - -static const uint32 g_vs_cb_index = 20; -static const uint32 g_ps_cb_index = 21; -static const uint32 g_gs_cb_index = 22; - -void GSDeviceOGL::CreateTextureFX() -{ - m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer)); - m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer)); - - // warning 1 sampler by image unit. So you cannot reuse m_ps_ss... - m_palette_ss = CreateSampler(false, false, false); - glBindSampler(1, m_palette_ss); - - // Pre compile all Geometry & Vertex Shader - // It might cost a seconds at startup but it would reduce benchmark pollution - GL_PUSH("Compile GS"); - - for (uint32 key = 0; key < countof(m_gs); key++) { - GSSelector sel(key); - if (sel.point == sel.sprite) - m_gs[key] = 0; - else - m_gs[key] = CompileGS(GSSelector(key)); - } - - GL_POP(); - - GL_PUSH("Compile VS"); - - for (uint32 key = 0; key < countof(m_vs); key++) { - VSSelector sel(key); - m_vs[key] = CompileVS(sel, !GLLoader::found_GL_ARB_clip_control); - } - - GL_POP(); - - // Enable all bits for stencil operations. Technically 1 bit is - // enough but buffer is polluted with noise. Clear will be limited - // to the mask. - glStencilMask(0xFF); - for (uint32 key = 0; key < countof(m_om_dss); key++) { - m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); - } - - // Help to debug FS in apitrace - m_apitrace = CompilePS(PSSelector()); -} - -GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel) -{ - GSDepthStencilOGL* dss = new GSDepthStencilOGL(); - - if (dssel.date) - { - dss->EnableStencil(); - dss->SetStencil(GL_EQUAL, GL_KEEP); - } - - if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) - { - static const GLenum ztst[] = - { - GL_NEVER, - GL_ALWAYS, - GL_GEQUAL, - GL_GREATER - }; - dss->EnableDepth(); - dss->SetDepth(ztst[dssel.ztst], dssel.zwe); - } - - return dss; -} - -void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb) -{ - GL_PUSH("UBO"); - if(m_vs_cb_cache.Update(vs_cb)) { - m_vs_cb->upload(vs_cb); - } - - if(m_ps_cb_cache.Update(ps_cb)) { - m_ps_cb->upload(ps_cb); - } - GL_POP(); -} - -void GSDeviceOGL::SetupVS(VSSelector sel) -{ - m_shader->VS(m_vs[sel]); -} - -void GSDeviceOGL::SetupGS(GSSelector sel) -{ - m_shader->GS(m_gs[sel]); -} - -void GSDeviceOGL::SetupPS(PSSelector sel) -{ - // ************************************************************* - // Static - // ************************************************************* - GLuint ps; - auto i = m_ps.find(sel); - - if (i == m_ps.end()) { - ps = CompilePS(sel); - m_ps[sel] = ps; - } else { - ps = i->second; - } - - // ************************************************************* - // Dynamic - // ************************************************************* - m_shader->PS(ps); -} - -void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel) -{ - PSSetSamplerState(m_ps_ss[ssel]); -} - -GLuint GSDeviceOGL::GetSamplerID(PSSamplerSelector ssel) -{ - return m_ps_ss[ssel]; -} - -GLuint GSDeviceOGL::GetPaletteSamplerID() -{ - return m_palette_ss; -} - -void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel) -{ - OMSetDepthStencilState(m_om_dss[dssel]); -} diff --git a/plugins/GSdx_legacy/GSTextureNull.cpp b/plugins/GSdx_legacy/GSTextureNull.cpp deleted file mode 100644 index 8a372573d8..0000000000 --- a/plugins/GSdx_legacy/GSTextureNull.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTextureNull.h" - -GSTextureNull::GSTextureNull() -{ - memset(&m_desc, 0, sizeof(m_desc)); -} - -GSTextureNull::GSTextureNull(int type, int w, int h, int format) -{ - m_desc.type = type; - m_desc.w = w; - m_desc.h = h; - m_desc.format = format; -} diff --git a/plugins/GSdx_legacy/GSTextureNull.h b/plugins/GSdx_legacy/GSTextureNull.h deleted file mode 100644 index 5d443b24e7..0000000000 --- a/plugins/GSdx_legacy/GSTextureNull.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSTexture.h" - -class GSTextureNull : public GSTexture -{ - struct {int type, w, h, format;} m_desc; - -public: - GSTextureNull(); - GSTextureNull(int type, int w, int h, int format); - - int GetType() const {return m_desc.type;} - int GetFormat() const {return m_desc.format;} - - bool Update(const GSVector4i& r, const void* data, int pitch) {return true;} - bool Map(GSMap& m, const GSVector4i* r) {return false;} - void Unmap() {} - bool Save(const string& fn, bool user_image = false, bool dds = false) { return false; } -}; diff --git a/plugins/GSdx_legacy/GSTextureOGL.cpp b/plugins/GSdx_legacy/GSTextureOGL.cpp deleted file mode 100644 index 15ca86b3e7..0000000000 --- a/plugins/GSdx_legacy/GSTextureOGL.cpp +++ /dev/null @@ -1,551 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include -#include "GSTextureOGL.h" -#include "GLState.h" -#include "GSPng.h" - -#ifdef ENABLE_OGL_DEBUG_MEM_BW -extern uint64 g_real_texture_upload_byte; -#endif - -// FIXME find the optimal number of PBO -#define PBO_POOL_SIZE 8 - -// FIXME OGL4: investigate, only 1 unpack buffer always bound -namespace PboPool { - - GLuint m_pool[PBO_POOL_SIZE]; - uptr m_offset[PBO_POOL_SIZE]; - char* m_map[PBO_POOL_SIZE]; - uint32 m_current_pbo = 0; - uint32 m_size; - bool m_texture_storage; - GLsync m_fence[PBO_POOL_SIZE]; - const uint32 m_pbo_size = 8*1024*1024; - - // Option for buffer storage - // XXX: actually does I really need coherent and barrier??? - // As far as I understand glTexSubImage2D is a client-server transfer so no need to make - // the value visible to the server - const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; - const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT; - const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT; - - // Perf impact (test was only done on a gs dump): - // Normal (fast): Message:Buffer detailed info: Buffer object 9 (bound to - // GL_PIXEL_UNPACK_BUFFER_ARB, usage hint is GL_STREAM_COPY) will use VIDEO - // memory as the source for buffer object operations. - // - // Persistent (slower): Message:Buffer detailed info: Buffer object 8 - // (bound to GL_PIXEL_UNPACK_BUFFER_ARB, usage hint is GL_DYNAMIC_DRAW) - // will use DMA CACHED memory as the source for buffer object operations - void Init() { - glGenBuffers(countof(m_pool), m_pool); - m_texture_storage = GLLoader::found_GL_ARB_buffer_storage; - - for (size_t i = 0; i < countof(m_pool); i++) { - BindPbo(); - - if (m_texture_storage) { - glBufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, create_flags); - m_map[m_current_pbo] = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags); - m_fence[m_current_pbo] = 0; - } else { - glBufferData(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_STREAM_COPY); - m_map[m_current_pbo] = NULL; - } - - NextPbo(); - } - UnbindPbo(); - } - - char* Map(uint32 size) { - char* map; - m_size = size; - - if (m_size > m_pbo_size) { - fprintf(stderr, "BUG: PBO too small %d but need %d\n", m_pbo_size, m_size); - } - - if (m_texture_storage) { - if (m_offset[m_current_pbo] + m_size >= m_pbo_size) { - //NextPbo(); // For test purpose - NextPboWithSync(); - } - - // Note: texsubimage will access currently bound buffer - // Pbo ready let's get a pointer - BindPbo(); - - map = m_map[m_current_pbo] + m_offset[m_current_pbo]; - - } else { - GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_RANGE_BIT; - - if (m_offset[m_current_pbo] + m_size >= m_pbo_size) { - NextPbo(); - - flags &= ~GL_MAP_INVALIDATE_RANGE_BIT; - flags |= GL_MAP_INVALIDATE_BUFFER_BIT; - } - - // Pbo ready let's get a pointer - BindPbo(); - - // Be sure the map is aligned - map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size, flags); - } - - return map; - } - - void Unmap() { - if (m_texture_storage) { - glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size); - } else { - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - } - } - - uptr Offset() { - return m_offset[m_current_pbo]; - } - - void Destroy() { - if (m_texture_storage) { - for (size_t i = 0; i < countof(m_pool); i++) { - m_map[i] = NULL; - m_offset[i] = 0; - glDeleteSync(m_fence[i]); - - // Don't know if we must do it - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pool[i]); - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - } - glDeleteBuffers(countof(m_pool), m_pool); - } - - void BindPbo() { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pool[m_current_pbo]); - } - - void NextPbo() { - m_current_pbo = (m_current_pbo + 1) & (countof(m_pool)-1); - // Mark new PBO as free - m_offset[m_current_pbo] = 0; - } - - void NextPboWithSync() { - m_fence[m_current_pbo] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - NextPbo(); - if (m_fence[m_current_pbo]) { -#ifdef ENABLE_OGL_DEBUG_FENCE - GLenum status = glClientWaitSync(m_fence[m_current_pbo], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -#else - glClientWaitSync(m_fence[m_current_pbo], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -#endif - glDeleteSync(m_fence[m_current_pbo]); - m_fence[m_current_pbo] = 0; - -#ifdef ENABLE_OGL_DEBUG_FENCE - if (status != GL_ALREADY_SIGNALED) { - fprintf(stderr, "GL_PIXEL_UNPACK_BUFFER: Sync Sync! Buffer too small\n"); - } -#endif - } - } - - void UnbindPbo() { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - } - - void EndTransfer() { - // Note: keep offset aligned for SSE/AVX - m_offset[m_current_pbo] = (m_offset[m_current_pbo] + m_size + 63) & ~0x3F; - } -} - -// FIXME: check if it possible to always use those setup by default -// glPixelStorei(GL_PACK_ALIGNMENT, 1); -// glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - -GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read) - : m_pbo_size(0), m_dirty(false), m_clean(false), m_local_buffer(NULL), m_r_x(0), m_r_y(0), m_r_w(0), m_r_h(0) -{ - // OpenGL didn't like dimensions of size 0 - m_size.x = max(1,w); - m_size.y = max(1,h); - m_format = format; - m_type = type; - m_fbo_read = fbo_read; - m_texture_id = 0; - - // Bunch of constant parameter - switch (m_format) { - // 1 Channel integer - case GL_R32UI: - case GL_R32I: - m_int_format = GL_RED_INTEGER; - m_int_type = (m_format == GL_R32UI) ? GL_UNSIGNED_INT : GL_INT; - m_int_alignment = 4; - m_int_shift = 2; - break; - case GL_R16UI: - m_int_format = GL_RED_INTEGER; - m_int_type = GL_UNSIGNED_SHORT; - m_int_alignment = 2; - m_int_shift = 1; - break; - - // 1 Channel normalized - case GL_R8: - m_int_format = GL_RED; - m_int_type = GL_UNSIGNED_BYTE; - m_int_alignment = 1; - m_int_shift = 0; - break; - - // 4 channel normalized - case GL_RGBA16: - m_int_format = GL_RGBA; - m_int_type = GL_UNSIGNED_SHORT; - m_int_alignment = 8; - m_int_shift = 3; - break; - case GL_RGBA8: - m_int_format = GL_RGBA; - m_int_type = GL_UNSIGNED_BYTE; - m_int_alignment = 4; - m_int_shift = 2; - break; - - // 4 channel integer - case GL_RGBA16I: - case GL_RGBA16UI: - m_int_format = GL_RGBA_INTEGER; - m_int_type = (m_format == GL_R16UI) ? GL_UNSIGNED_SHORT : GL_SHORT; - m_int_alignment = 8; - m_int_shift = 3; - break; - - // 4 channel float - case GL_RGBA32F: - m_int_format = GL_RGBA; - m_int_type = GL_FLOAT; - m_int_alignment = 16; - m_int_shift = 4; - break; - case GL_RGBA16F: - m_int_format = GL_RGBA; - m_int_type = GL_HALF_FLOAT; - m_int_alignment = 8; - m_int_shift = 3; - break; - - // Special - case 0: - case GL_DEPTH32F_STENCIL8: - // Backbuffer & dss aren't important - m_int_format = 0; - m_int_type = 0; - m_int_alignment = 0; - m_int_shift = 0; - break; - - default: - m_int_format = 0; - m_int_type = 0; - m_int_alignment = 0; - m_int_shift = 0; - ASSERT(0); - } - - // Generate & Allocate the buffer - switch (m_type) { - case GSTexture::Offscreen: - // 8B is the worst case for depth/stencil - // FIXME I think it is only used for color. So you can save half of the size - m_local_buffer = (uint8*)_aligned_malloc(m_size.x * m_size.y * 4, 32); - case GSTexture::Texture: - case GSTexture::RenderTarget: - case GSTexture::DepthStencil: - glCreateTextures(GL_TEXTURE_2D, 1, &m_texture_id); - glTextureStorage2D(m_texture_id, 1+GL_TEX_LEVEL_0, m_format, m_size.x, m_size.y); - if (m_format == GL_R8) { - // Emulate DX behavior, beside it avoid special code in shader to differentiate - // palette texture from a GL_RGBA target or a GL_R texture. - glTextureParameteri(m_texture_id, GL_TEXTURE_SWIZZLE_A, GL_RED); - } - break; - case GSTexture::Backbuffer: - default: - break; - } -} - -GSTextureOGL::~GSTextureOGL() -{ - /* Unbind the texture from our local state */ - - if (m_texture_id == GLState::rt) - GLState::rt = 0; - if (m_texture_id == GLState::ds) - GLState::ds = 0; - for (size_t i = 0; i < countof(GLState::tex_unit); i++) { - if (m_texture_id == GLState::tex_unit[i]) - GLState::tex_unit[i] = 0; - } - - glDeleteTextures(1, &m_texture_id); - - if (m_local_buffer) - _aligned_free(m_local_buffer); -} - -void GSTextureOGL::Invalidate() -{ - if (m_dirty && glInvalidateTexImage) { - glInvalidateTexImage(m_texture_id, GL_TEX_LEVEL_0); - m_dirty = false; - } -} - -bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) -{ - ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen); - - // Default upload path for the texture is the Map/Unmap - // This path is mostly used for palette. But also for texture that could - // overflow the pbo buffer - // Data upload is rather small typically 64B or 1024B. So don't bother with PBO - // and directly send the data to the GL synchronously - - m_dirty = true; - m_clean = false; - - uint32 row_byte = r.width() << m_int_shift; - uint32 map_size = r.height() * row_byte; -#ifdef ENABLE_OGL_DEBUG_MEM_BW - g_real_texture_upload_byte += map_size; -#endif - - glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); - -#if 0 - if (r.height() == 1) { - // Palette data. Transfer is small either 64B or 1024B. - // Sometimes it is faster, sometimes slower. - glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); - return true; - } -#endif - - GL_PUSH("Upload Texture %d", m_texture_id); - - // The easy solution without PBO -#if 0 - // Likely a bad texture - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); - - glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior -#endif - - // The complex solution with PBO -#if 1 - char* src = (char*)data; - char* map = PboPool::Map(map_size); - - // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch - // Note: row_byte != pitch - for (int h = 0; h < r.height(); h++) { - memcpy(map, src, row_byte); - map += row_byte; - src += pitch; - } - - PboPool::Unmap(); - - glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset()); - - // FIXME OGL4: investigate, only 1 unpack buffer always bound - PboPool::UnbindPbo(); - - PboPool::EndTransfer(); -#endif - - GL_POP(); - - return true; -} - -bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r) -{ - GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y); - - // LOTS OF CRAP CODE!!!! PLEASE FIX ME !!! - if (m_type == GSTexture::Offscreen) { - // The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx - // architecture is waiting the data right now. - -#if 0 - // Maybe it is as good as the code below. I don't know - // With openGL 4.5 you can use glGetTextureSubImage - - glGetTextureImage(m_texture_id, GL_TEX_LEVEL_0, m_int_format, m_int_type, 1024*1024*16, m_local_buffer); - -#else - - // Bind the texture to the read framebuffer to avoid any disturbance - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); - - glPixelStorei(GL_PACK_ALIGNMENT, m_int_alignment); - glReadPixels(r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, m_local_buffer); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - -#endif - - m.bits = m_local_buffer; - m.pitch = m_size.x << m_int_shift; - - return true; - } else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { - GL_PUSH("Upload Texture %d", m_texture_id); // POP is in Unmap - - m_dirty = true; - m_clean = false; - - uint32 row_byte = r.width() << m_int_shift; - uint32 map_size = r.height() * row_byte; - - m.bits = (uint8*)PboPool::Map(map_size); - m.pitch = row_byte; - -#ifdef ENABLE_OGL_DEBUG_MEM_BW - g_real_texture_upload_byte += map_size; -#endif - - // Save the area for the unmap - m_r_x = r.x; - m_r_y = r.y; - m_r_w = r.width(); - m_r_h = r.height(); - - return true; - } - - return false; -} - -void GSTextureOGL::Unmap() -{ - if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { - - PboPool::Unmap(); - - glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type, (const void*)PboPool::Offset()); - - // FIXME OGL4: investigate, only 1 unpack buffer always bound - PboPool::UnbindPbo(); - - PboPool::EndTransfer(); - - GL_POP(); // PUSH is in Map - } -} - -bool GSTextureOGL::Save(const string& fn, bool user_image, bool dds) -{ - // Collect the texture data - uint32 pitch = 4 * m_size.x; - uint32 buf_size = pitch * m_size.y * 2;// Note *2 for security (depth/stencil) - std::unique_ptr image(new uint8[buf_size]); -#ifdef ENABLE_OGL_DEBUG - GSPng::Format fmt = GSPng::RGB_A_PNG; -#else - GSPng::Format fmt = GSPng::RGB_PNG; -#endif - - if (IsBackbuffer()) { - glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get()); - } else if(IsDss()) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_texture_id, 0); - glReadPixels(0, 0, m_size.x, m_size.y, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image.get()); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - - fmt = GSPng::RGB_A_PNG; - } else if(m_format == GL_R32I) { - glGetTextureImage(m_texture_id, 0, GL_RED_INTEGER, GL_INT, buf_size, image.get()); - - fmt = GSPng::R32I_PNG; - } else { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); - - if (m_format == GL_RGBA8) { - glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get()); - } - else if (m_format == GL_R16UI) - { - glReadPixels(0, 0, m_size.x, m_size.y, GL_RED_INTEGER, GL_UNSIGNED_SHORT, image.get()); - fmt = GSPng::R16I_PNG; - } - else if (m_format == GL_R8) - { - fmt = GSPng::R8I_PNG; - glReadPixels(0, 0, m_size.x, m_size.y, GL_RED, GL_UNSIGNED_BYTE, image.get()); - } - - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } - - int compression = user_image ? Z_BEST_COMPRESSION : theApp.GetConfig("png_compression_level", Z_BEST_SPEED); - return GSPng::Save(fmt, fn, image.get(), m_size.x, m_size.y, pitch, compression); -} - -uint32 GSTextureOGL::GetMemUsage() -{ - switch (m_type) { - case GSTexture::Offscreen: - return m_size.x * m_size.y * (4 + m_int_alignment); - case GSTexture::Texture: - case GSTexture::RenderTarget: - return m_size.x * m_size.y * m_int_alignment; - case GSTexture::DepthStencil: - return m_size.x * m_size.y * 8; - case GSTexture::Backbuffer: - default: - return 0; - } -} diff --git a/plugins/GSdx_legacy/GSTextureOGL.h b/plugins/GSdx_legacy/GSTextureOGL.h deleted file mode 100644 index def7ce16d7..0000000000 --- a/plugins/GSdx_legacy/GSTextureOGL.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSTexture.h" - -namespace PboPool { - void BindPbo(); - void UnbindPbo(); - void NextPbo(); - void NextPboWithSync(); - - char* Map(uint32 size); - void Unmap(); - uptr Offset(); - void EndTransfer(); - - void Init(); - void Destroy(); -} - -class GSTextureOGL final : public GSTexture -{ - private: - GLuint m_texture_id; // the texture id - int m_pbo_size; - GLuint m_fbo_read; - bool m_dirty; - bool m_clean; - - uint8* m_local_buffer; - // Avoid alignment constrain - //GSVector4i m_r; - int m_r_x; - int m_r_y; - int m_r_w; - int m_r_h; - - - // internal opengl format/type/alignment - GLenum m_int_format; - GLenum m_int_type; - uint32 m_int_alignment; - uint32 m_int_shift; - - public: - explicit GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read); - virtual ~GSTextureOGL(); - - void Invalidate() final; - bool Update(const GSVector4i& r, const void* data, int pitch) final; - bool Map(GSMap& m, const GSVector4i* r = NULL) final; - void Unmap() final; - bool Save(const string& fn, bool user_image = false, bool dds = false) final; - - bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); } - bool IsDss() { return (m_type == GSTexture::DepthStencil); } - - uint32 GetID() final { return m_texture_id; } - bool HasBeenCleaned() { return m_clean; } - void WasAttached() { m_clean = false; m_dirty = true; } - void WasCleaned() { m_clean = true; } - - uint32 GetMemUsage(); -}; diff --git a/plugins/GSdx_legacy/GSTextureSW.cpp b/plugins/GSdx_legacy/GSTextureSW.cpp deleted file mode 100644 index 0baba7282b..0000000000 --- a/plugins/GSdx_legacy/GSTextureSW.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSTextureSW.h" -#include "GSPng.h" - -GSTextureSW::GSTextureSW(int type, int width, int height) -{ - m_mapped.clear(); - m_size = GSVector2i(width, height); - m_type = type; - m_format = 0; - m_pitch = ((width << 2) + 31) & ~31; - m_data = _aligned_malloc(m_pitch * height, 32); -} - -GSTextureSW::~GSTextureSW() -{ - _aligned_free(m_data); -} - -bool GSTextureSW::Update(const GSVector4i& r, const void* data, int pitch) -{ - GSMap m; - - if(m_data != NULL && Map(m, &r)) - { - uint8* RESTRICT src = (uint8*)data; - uint8* RESTRICT dst = m.bits; - - int rowbytes = r.width() << 2; - - for(int h = r.height(); h > 0; h--, src += pitch, dst += m.pitch) - { - memcpy(dst, src, rowbytes); - } - - Unmap(); - - return true; - } - - return false; -} - -bool GSTextureSW::Map(GSMap& m, const GSVector4i* r) -{ - GSVector4i r2 = r != NULL ? *r : GSVector4i(0, 0, m_size.x, m_size.y); - - if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y) - { - if (!m_mapped.test_and_set()) - { - m.bits = (uint8*)m_data + ((m_pitch * r2.top + r2.left) << 2); - m.pitch = m_pitch; - - return true; - } - } - - return false; -} - -void GSTextureSW::Unmap() -{ - m_mapped.clear(); -} - -bool GSTextureSW::Save(const string& fn, bool user_image, bool dds) -{ - if(dds) return false; // not implemented - -#ifdef ENABLE_OGL_DEBUG - GSPng::Format fmt = GSPng::RGB_A_PNG; -#else - GSPng::Format fmt = GSPng::RGB_PNG; -#endif - int compression = user_image ? Z_BEST_COMPRESSION : theApp.GetConfig("png_compression_level", Z_BEST_SPEED); - return GSPng::Save(fmt, fn, static_cast(m_data), m_size.x, m_size.y, m_pitch, compression); -} diff --git a/plugins/GSdx_legacy/GSTextureSW.h b/plugins/GSdx_legacy/GSTextureSW.h deleted file mode 100644 index 52154ee297..0000000000 --- a/plugins/GSdx_legacy/GSTextureSW.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSTexture.h" - -class GSTextureSW : public GSTexture -{ - // mem texture, always 32-bit rgba (might add 8-bit for palette if needed) - - int m_pitch; - void* m_data; - std::atomic_flag m_mapped; - -public: - GSTextureSW(int type, int width, int height); - virtual ~GSTextureSW(); - - bool Update(const GSVector4i& r, const void* data, int pitch); - bool Map(GSMap& m, const GSVector4i* r); - void Unmap(); - bool Save(const string& fn, bool user_image = false, bool dds = false); -}; diff --git a/plugins/GSdx_legacy/GSThread.cpp b/plugins/GSdx_legacy/GSThread.cpp deleted file mode 100644 index 860b7384f2..0000000000 --- a/plugins/GSdx_legacy/GSThread.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSThread_CXX11.h" - -GSThread::GSThread() -{ - #ifdef _WIN32 - - m_ThreadId = 0; - m_hThread = NULL; - - #else - - #endif -} - -GSThread::~GSThread() -{ - CloseThread(); -} - -#ifdef _WIN32 - -DWORD WINAPI GSThread::StaticThreadProc(void* lpParam) -{ - ((GSThread*)lpParam)->ThreadProc(); - - return 0; -} - -#else - -void* GSThread::StaticThreadProc(void* param) -{ - ((GSThread*)param)->ThreadProc(); -#ifndef _STD_THREAD_ // exit is done implicitly by std::thread - pthread_exit(NULL); -#endif - return NULL; -} - -#endif - -void GSThread::CreateThread() -{ - #ifdef _WIN32 - - m_hThread = ::CreateThread(NULL, 0, StaticThreadProc, (void*)this, 0, &m_ThreadId); - - #else - - #ifdef _STD_THREAD_ - t = new thread(StaticThreadProc,(void*)this); - #else - pthread_attr_init(&m_thread_attr); - pthread_create(&m_thread, &m_thread_attr, StaticThreadProc, (void*)this); - #endif - - #endif -} - -void GSThread::CloseThread() -{ - #ifdef _WIN32 - - if(m_hThread != NULL) - { - if(WaitForSingleObject(m_hThread, 5000) != WAIT_OBJECT_0) - { - printf("GSdx: WARNING: GSThread Thread did not close itself in time. Assuming hung. Terminating.\n"); - TerminateThread(m_hThread, 1); - } - - CloseHandle(m_hThread); - - m_hThread = NULL; - m_ThreadId = 0; - } - - #else - // Should be tested on windows too one day, native handle should be disabled there though, or adapted to windows thread - #ifdef _STD_THREAD_ - - #define _NATIVE_HANDLE_ // Using std::thread native handle, allows to just use posix stuff. - #ifdef _NATIVE_HANDLE_ // std::thread join seems to be bugged, have to test it again every now and then, it did work at some point(gcc 5), seems there is bug in system lib... - pthread_t m_thread = t->native_handle(); - void *ret = NULL; - pthread_join(m_thread, &ret); - /* We are sure thread is dead, not so bad. - * Still no way to to delete that crap though... Really, wtf is this standard?? - * I guess we will have to wait that someone debug either the implementation or change standard. - * There should be a moderate memory leak for now, I am trying to find a way to fix it. - * 3kinox - */ - #else - if(t->joinable()) - { - t->join(); - } - delete(t); - #endif - #else - void* ret = NULL; - - pthread_join(m_thread, &ret); - pthread_attr_destroy(&m_thread_attr); - #endif - #endif -} - diff --git a/plugins/GSdx_legacy/GSThread.h b/plugins/GSdx_legacy/GSThread.h deleted file mode 100644 index 9594a793f3..0000000000 --- a/plugins/GSdx_legacy/GSThread.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSdx.h" - -// http://software.intel.com/en-us/blogs/2012/11/06/exploring-intel-transactional-synchronization-extensions-with-intel-software -#if 0 -class TransactionScope -{ -public: - class Lock - { - std::atomic state; - - public: - Lock() - : state(false) - { - } - - void lock() - { - bool expected_value = false; - while(state.compare_exchange_strong(expected_value, true)) - { - do {_mm_pause();} while(state); - } - } - - void unlock() - { - state = false; - } - - bool isLocked() const - { - return state.load(); - } - }; - -private: - Lock& fallBackLock; - - TransactionScope(); - -public: - TransactionScope(Lock& fallBackLock_, int max_retries = 3) - : fallBackLock(fallBackLock_) - { - // The TSX (RTM/HLE) instructions on Intel AVX2 CPUs may either be - // absent or disabled (see errata HSD136 and specification change at - // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/4th-gen-core-family-desktop-specification-update.pdf) - // This can cause builds for AVX2 CPUs to fail with GCC/Clang on Linux, - // so check that the RTM instructions are actually available. - #if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__) - - int nretries = 0; - - while(1) - { - ++nretries; - - unsigned status = _xbegin(); - - if(status == _XBEGIN_STARTED) - { - if(!fallBackLock.isLocked()) return; - - _xabort(0xff); - } - - if((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff && !(status & _XABORT_NESTED)) - { - while(fallBackLock.isLocked()) _mm_pause(); - } - else if(!(status & _XABORT_RETRY)) - { - break; - } - - if(nretries >= max_retries) - { - break; - } - } - - #endif - - fallBackLock.lock(); - } - - ~TransactionScope() - { - if(fallBackLock.isLocked()) - { - fallBackLock.unlock(); - } - #if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__) - else - { - _xend(); - } - #endif - } -}; -#endif diff --git a/plugins/GSdx_legacy/GSThread_CXX11.h b/plugins/GSdx_legacy/GSThread_CXX11.h deleted file mode 100644 index 378a571f14..0000000000 --- a/plugins/GSdx_legacy/GSThread_CXX11.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSdx.h" -#include "Utilities/boost_spsc_queue.hpp" - -class IGSThread -{ -protected: - virtual void ThreadProc() = 0; -}; - -// let us use std::thread for now, comment out the definition to go back to pthread -// There are currently some bugs/limitations to std::thread (see various comment) -// For the moment let's keep pthread but uses new std object (mutex, cond_var) -//#define _STD_THREAD_ - -#ifdef _WIN32 - -class GSThread : public IGSThread -{ - DWORD m_ThreadId; - HANDLE m_hThread; - - static DWORD WINAPI StaticThreadProc(void* lpParam); - -protected: - void CreateThread(); - void CloseThread(); - -public: - GSThread(); - virtual ~GSThread(); -}; - -#else - -#ifdef _STD_THREAD_ -#include -#else -#include -#endif - -class GSThread : public IGSThread -{ - #ifdef _STD_THREAD_ - std::thread *t; - #else - pthread_attr_t m_thread_attr; - pthread_t m_thread; - #endif - static void* StaticThreadProc(void* param); - -protected: - void CreateThread(); - void CloseThread(); - -public: - GSThread(); - virtual ~GSThread(); -}; - -#endif - -template class IGSJobQueue : public GSThread -{ -public: - IGSJobQueue() {} - virtual ~IGSJobQueue() {} - - virtual bool IsEmpty() const = 0; - virtual void Push(const T& item) = 0; - virtual void Wait() = 0; - - virtual void Process(T& item) = 0; - virtual int GetPixels(bool reset) = 0; -}; - -template class GSJobQueue : public IGSJobQueue -{ -protected: - std::atomic m_count; - std::atomic m_exit; - ringbuffer_base m_queue; - - std::mutex m_lock; - std::condition_variable m_empty; - std::condition_variable m_notempty; - - void ThreadProc() { - std::unique_lock l(m_lock); - - while (true) { - - while (m_count == 0) { - if (m_exit.load(memory_order_acquire)) return; - m_notempty.wait(l); - } - - l.unlock(); - - int16_t consumed = 0; - for (int16_t nb = m_count; nb >= 0; nb--) { - if (m_queue.consume_one(*this)) - consumed++; - } - - l.lock(); - - m_count -= consumed; - - if (m_count <= 0) - m_empty.notify_one(); - - } - } - -public: - GSJobQueue() : - m_count(0), - m_exit(false) - { - this->CreateThread(); - } - - virtual ~GSJobQueue() { - m_exit.store(true, memory_order_release); - m_notempty.notify_one(); - this->CloseThread(); - } - - bool IsEmpty() const { - ASSERT(m_count >= 0); - - return m_count == 0; - } - - void Push(const T& item) { - while(!m_queue.push(item)) - std::this_thread::yield(); - - std::unique_lock l(m_lock); - - m_count++; - - l.unlock(); - - m_notempty.notify_one(); - } - - void Wait() { - if (m_count > 0) { - std::unique_lock l(m_lock); - while (m_count > 0) { - m_empty.wait(l); - } - } - - ASSERT(m_count == 0); - } - - void operator() (T& item) { - this->Process(item); - } -}; diff --git a/plugins/GSdx_legacy/GSUniformBufferOGL.h b/plugins/GSdx_legacy/GSUniformBufferOGL.h deleted file mode 100644 index 7237f96d5e..0000000000 --- a/plugins/GSdx_legacy/GSUniformBufferOGL.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GLState.h" - -#ifdef ENABLE_OGL_DEBUG_MEM_BW -extern uint64 g_uniform_upload_byte; -#endif - - -class GSUniformBufferOGL { - GLuint buffer; // data object - GLuint index; // GLSL slot - uint32 size; // size of the data - -public: - GSUniformBufferOGL(GLuint index, uint32 size) : index(index) - , size(size) - { - glGenBuffers(1, &buffer); - bind(); - allocate(); - attach(); - } - - void bind() - { - if (GLState::ubo != buffer) { - GLState::ubo = buffer; - glBindBuffer(GL_UNIFORM_BUFFER, buffer); - } - } - - void allocate() - { - glBufferData(GL_UNIFORM_BUFFER, size, NULL, GL_DYNAMIC_DRAW); - } - - void attach() - { - // From the opengl manpage: - // glBindBufferBase also binds buffer to the generic buffer binding point specified by target - GLState::ubo = buffer; - glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer); - } - - void upload(const void* src) - { - bind(); - // glMapBufferRange allow to set various parameter but the call is - // synchronous whereas glBufferSubData could be asynchronous. - // TODO: investigate the extension ARB_invalidate_subdata - glBufferSubData(GL_UNIFORM_BUFFER, 0, size, src); -#ifdef ENABLE_OGL_DEBUG_MEM_BW - g_uniform_upload_byte += size; -#endif - } - - ~GSUniformBufferOGL() { - glDeleteBuffers(1, &buffer); - } -}; - -#define UBO_BUFFER_SIZE (4*1024*1024) - -class GSUniformBufferStorageOGL { - GLuint buffer; // data object - GLuint index; // GLSL slot - uint32 size; // size of the data - uint8* m_buffer_ptr; - uint32 m_offset; - -public: - GSUniformBufferStorageOGL(GLuint index, uint32 size) : index(index) - , size(size), m_offset(0) - { - glGenBuffers(1, &buffer); - bind(); - allocate(); - attach(); - } - - void bind() - { - if (GLState::ubo != buffer) { - GLState::ubo = buffer; - glBindBuffer(GL_UNIFORM_BUFFER, buffer); - } - } - - void allocate() - { - const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT /*| GL_MAP_COHERENT_BIT */; - const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT; - const GLbitfield create_flags = common_flags /*| GL_CLIENT_STORAGE_BIT */; - - GLsizei buffer_size = UBO_BUFFER_SIZE; - glBufferStorage(GL_UNIFORM_BUFFER, buffer_size, NULL, create_flags); - m_buffer_ptr = (uint8*) glMapBufferRange(GL_UNIFORM_BUFFER, 0, buffer_size, map_flags); - ASSERT(m_buffer_ptr); - } - - void attach() - { - // From the opengl manpage: - // glBindBufferBase also binds buffer to the generic buffer binding point specified by target - GLState::ubo = buffer; - //glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer); - glBindBufferRange(GL_UNIFORM_BUFFER, index, buffer, m_offset, size); - } - - void upload(const void* src) - { -#ifdef ENABLE_OGL_DEBUG_MEM_BW - g_uniform_upload_byte += size; -#endif - - memcpy(m_buffer_ptr + m_offset, src, size); - - attach(); - glFlushMappedBufferRange(GL_UNIFORM_BUFFER, m_offset, size); - - m_offset = (m_offset + size + 255u) & ~0xFF; - if (m_offset >= UBO_BUFFER_SIZE) - m_offset = 0; - } - - ~GSUniformBufferStorageOGL() { - bind(); - glUnmapBuffer(GL_UNIFORM_BUFFER); - glDeleteBuffers(1, &buffer); - } -}; - -#undef UBO_BUFFER_SIZE diff --git a/plugins/GSdx_legacy/GSUtil.cpp b/plugins/GSdx_legacy/GSUtil.cpp deleted file mode 100644 index 2afded494d..0000000000 --- a/plugins/GSdx_legacy/GSUtil.cpp +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GS.h" -#include "GSUtil.h" -#include "xbyak/xbyak_util.h" - -#ifdef _WIN32 -#include "GSDeviceDX.h" -#include -#include "svnrev.h" -#else -#define SVN_REV 0 -#define SVN_MODS 0 -#endif - -const char* GSUtil::GetLibName() -{ - // TODO: critsec - - static string str; - - if(str.empty()) - { - str = "GSdx"; - - #ifdef _WIN32 - str += format(" %lld", SVN_REV); - if(SVN_MODS) str += "m"; - #endif - - #ifdef _M_AMD64 - str += " 64-bit"; - #endif - - list sl; - - #ifdef __INTEL_COMPILER - sl.push_back(format("Intel C++ %d.%02d", __INTEL_COMPILER / 100, __INTEL_COMPILER % 100)); - #elif _MSC_VER - sl.push_back(format("MSVC %d.%02d", _MSC_VER / 100, _MSC_VER % 100)); - #elif __GNUC__ - sl.push_back(format("GCC %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)); - #endif - - #if _M_SSE >= 0x501 - sl.push_back("AVX2"); - #elif _M_SSE >= 0x500 - sl.push_back("AVX"); - #elif _M_SSE >= 0x402 - sl.push_back("SSE42"); - #elif _M_SSE >= 0x401 - sl.push_back("SSE41"); - #elif _M_SSE >= 0x301 - sl.push_back("SSSE3"); - #elif _M_SSE >= 0x200 - sl.push_back("SSE2"); - #elif _M_SSE >= 0x100 - sl.push_back("SSE"); - #endif - - for(list::iterator i = sl.begin(); i != sl.end(); ) - { - if(i == sl.begin()) str += " ("; - str += *i; - str += ++i != sl.end() ? ", " : ")"; - } - } - - return str.c_str(); -} - -static class GSUtilMaps -{ -public: - uint8 PrimClassField[8]; - uint8 VertexCountField[8]; - uint8 ClassVertexCountField[4]; - uint32 CompatibleBitsField[64][2]; - uint32 SharedBitsField[64][2]; - - GSUtilMaps() - { - PrimClassField[GS_POINTLIST] = GS_POINT_CLASS; - PrimClassField[GS_LINELIST] = GS_LINE_CLASS; - PrimClassField[GS_LINESTRIP] = GS_LINE_CLASS; - PrimClassField[GS_TRIANGLELIST] = GS_TRIANGLE_CLASS; - PrimClassField[GS_TRIANGLESTRIP] = GS_TRIANGLE_CLASS; - PrimClassField[GS_TRIANGLEFAN] = GS_TRIANGLE_CLASS; - PrimClassField[GS_SPRITE] = GS_SPRITE_CLASS; - PrimClassField[GS_INVALID] = GS_INVALID_CLASS; - - VertexCountField[GS_POINTLIST] = 1; - VertexCountField[GS_LINELIST] = 2; - VertexCountField[GS_LINESTRIP] = 2; - VertexCountField[GS_TRIANGLELIST] = 3; - VertexCountField[GS_TRIANGLESTRIP] = 3; - VertexCountField[GS_TRIANGLEFAN] = 3; - VertexCountField[GS_SPRITE] = 2; - VertexCountField[GS_INVALID] = 1; - - ClassVertexCountField[GS_POINT_CLASS] = 1; - ClassVertexCountField[GS_LINE_CLASS] = 2; - ClassVertexCountField[GS_TRIANGLE_CLASS] = 3; - ClassVertexCountField[GS_SPRITE_CLASS] = 2; - - memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField)); - - for(int i = 0; i < 64; i++) - { - CompatibleBitsField[i][i >> 5] |= 1 << (i & 0x1f); - } - - CompatibleBitsField[PSM_PSMCT32][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f); - CompatibleBitsField[PSM_PSMCT24][PSM_PSMCT32 >> 5] |= 1 << (PSM_PSMCT32 & 0x1f); - CompatibleBitsField[PSM_PSMCT16][PSM_PSMCT16S >> 5] |= 1 << (PSM_PSMCT16S & 0x1f); - CompatibleBitsField[PSM_PSMCT16S][PSM_PSMCT16 >> 5] |= 1 << (PSM_PSMCT16 & 0x1f); - CompatibleBitsField[PSM_PSMZ32][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f); - CompatibleBitsField[PSM_PSMZ24][PSM_PSMZ32 >> 5] |= 1 << (PSM_PSMZ32 & 0x1f); - CompatibleBitsField[PSM_PSMZ16][PSM_PSMZ16S >> 5] |= 1 << (PSM_PSMZ16S & 0x1f); - CompatibleBitsField[PSM_PSMZ16S][PSM_PSMZ16 >> 5] |= 1 << (PSM_PSMZ16 & 0x1f); - - memset(SharedBitsField, 0, sizeof(SharedBitsField)); - - SharedBitsField[PSM_PSMCT24][PSM_PSMT8H >> 5] |= 1 << (PSM_PSMT8H & 0x1f); - SharedBitsField[PSM_PSMCT24][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f); - SharedBitsField[PSM_PSMCT24][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f); - SharedBitsField[PSM_PSMZ24][PSM_PSMT8H >> 5] |= 1 << (PSM_PSMT8H & 0x1f); - SharedBitsField[PSM_PSMZ24][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f); - SharedBitsField[PSM_PSMZ24][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f); - SharedBitsField[PSM_PSMT8H][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f); - SharedBitsField[PSM_PSMT8H][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f); - SharedBitsField[PSM_PSMT4HL][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f); - SharedBitsField[PSM_PSMT4HL][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f); - SharedBitsField[PSM_PSMT4HL][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f); - SharedBitsField[PSM_PSMT4HH][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f); - SharedBitsField[PSM_PSMT4HH][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f); - SharedBitsField[PSM_PSMT4HH][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f); - } - -} s_maps; - -GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim) -{ - return (GS_PRIM_CLASS)s_maps.PrimClassField[prim]; -} - -int GSUtil::GetVertexCount(uint32 prim) -{ - return s_maps.VertexCountField[prim]; -} - -int GSUtil::GetClassVertexCount(uint32 primclass) -{ - return s_maps.ClassVertexCountField[primclass]; -} - -const uint32* GSUtil::HasSharedBitsPtr(uint32 dpsm) -{ - return s_maps.SharedBitsField[dpsm]; -} - -bool GSUtil::HasSharedBits(uint32 spsm, const uint32* RESTRICT ptr) -{ - return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0; -} - -bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm) -{ - return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0; -} - -bool GSUtil::HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm) -{ - return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0; -} - -bool GSUtil::HasCompatibleBits(uint32 spsm, uint32 dpsm) -{ - return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0; -} - -bool GSUtil::CheckSSE() -{ - Xbyak::util::Cpu cpu; - Xbyak::util::Cpu::Type type; - - #if _M_SSE >= 0x500 - type = Xbyak::util::Cpu::tAVX; - #elif _M_SSE >= 0x402 - type = Xbyak::util::Cpu::tSSE42; - #elif _M_SSE >= 0x401 - type = Xbyak::util::Cpu::tSSE41; - #elif _M_SSE >= 0x301 - type = Xbyak::util::Cpu::tSSSE3; - #elif _M_SSE >= 0x200 - type = Xbyak::util::Cpu::tSSE2; - #endif - - if(!cpu.has(type)) - { - fprintf(stderr, "This CPU does not support SSE %d.%02d", _M_SSE >> 8, _M_SSE & 0xff); - - return false; - } - - return true; -} - -#define OCL_PROGRAM_VERSION 3 - -#ifdef ENABLE_OPENCL -void GSUtil::GetDeviceDescs(list& dl) -{ - dl.clear(); - - try - { - std::vector platforms; - - cl::Platform::get(&platforms); - - for(auto& p : platforms) - { - std::string platform_vendor = p.getInfo(); - - std::vector ds; - - p.getDevices(CL_DEVICE_TYPE_ALL, &ds); - - for(auto& device : ds) - { - string type; - - switch(device.getInfo()) - { - case CL_DEVICE_TYPE_GPU: type = "GPU"; break; - case CL_DEVICE_TYPE_CPU: type = "CPU"; break; - } - - if(type.empty()) continue; - - std::string version = device.getInfo(); - - int major = 0; - int minor = 0; - - if(!type.empty() && sscanf(version.c_str(), "OpenCL C %d.%d", &major, &minor) == 2 && major == 1 && minor >= 1 || major > 1) - { - OCLDeviceDesc desc; - - desc.device = device; - desc.name = GetDeviceUniqueName(device); - desc.version = major * 100 + minor * 10; - - // TODO: linux - - char* buff = new char[MAX_PATH + 1]; - GetTempPath(MAX_PATH, buff); - desc.tmppath = string(buff) + "/" + desc.name; - - WIN32_FIND_DATA FindFileData; - HANDLE hFind = FindFirstFile(desc.tmppath.c_str(), &FindFileData); - if(hFind != INVALID_HANDLE_VALUE) FindClose(hFind); - else CreateDirectory(desc.tmppath.c_str(), NULL); - - sprintf(buff, "/%d", OCL_PROGRAM_VERSION); - desc.tmppath += buff; - delete[] buff; - - hFind = FindFirstFile(desc.tmppath.c_str(), &FindFileData); - if(hFind != INVALID_HANDLE_VALUE) FindClose(hFind); - else CreateDirectory(desc.tmppath.c_str(), NULL); - - dl.push_back(desc); - } - } - } - } - catch(cl::Error err) - { - printf("%s (%d)\n", err.what(), err.err()); - } -} - -string GSUtil::GetDeviceUniqueName(cl::Device& device) -{ - std::string vendor = device.getInfo(); - std::string name = device.getInfo(); - std::string version = device.getInfo(); - - string type; - - switch(device.getInfo()) - { - case CL_DEVICE_TYPE_GPU: type = "GPU"; break; - case CL_DEVICE_TYPE_CPU: type = "CPU"; break; - } - - version.erase(version.find_last_not_of(' ') + 1); - - return vendor + " " + name + " " + version + " " + type; -} -#endif - -#ifdef _WIN32 - -bool GSUtil::CheckDirectX() -{ - if (GSDeviceDX::LoadD3DCompiler()) - { - GSDeviceDX::FreeD3DCompiler(); - return true; - } - - // User's system is likely broken if it fails and is Windows 8.1 or greater. - if (!IsWindows8Point1OrGreater()) - { - printf("Cannot find d3dcompiler_43.dll\n"); - if (MessageBox(nullptr, TEXT("You need to update some DirectX libraries, would you like to do it now?"), TEXT("GSdx"), MB_YESNO) == IDYES) - { - ShellExecute(nullptr, TEXT("open"), TEXT("https://www.microsoft.com/en-us/download/details.aspx?id=8109"), nullptr, nullptr, SW_SHOWNORMAL); - } - } - return false; -} - -// --------------------------------------------------------------------------------- -// DX11 Detection (includes DXGI detection and dynamic library method bindings) -// --------------------------------------------------------------------------------- -// Code 'Borrowed' from Microsoft's DXGI sources -- Modified to suit our needs. --air -// Stripped down because of unnecessary complexity and false positives -// e.g. (d3d11_beta.dll would fail at device creation time) --pseudonym - -static int s_DXGI; -static int s_D3D11; - -bool GSUtil::CheckDXGI() -{ - if (0 == s_DXGI) - { - HMODULE hmod = LoadLibrary("dxgi.dll"); - s_DXGI = hmod ? 1 : -1; - if (hmod) - FreeLibrary(hmod); - } - - return s_DXGI > 0; -} - -bool GSUtil::CheckD3D11() -{ - if (!CheckDXGI()) - return false; - - if (0 == s_D3D11) - { - HMODULE hmod = LoadLibrary("d3d11.dll"); - s_D3D11 = hmod ? 1 : -1; - if (hmod) - FreeLibrary(hmod); - } - - return s_D3D11 > 0; -} - -D3D_FEATURE_LEVEL GSUtil::CheckDirect3D11Level(IDXGIAdapter *adapter, D3D_DRIVER_TYPE type) -{ - HRESULT hr; - D3D_FEATURE_LEVEL level; - - if(!CheckD3D11()) - return (D3D_FEATURE_LEVEL)0; - - hr = D3D11CreateDevice(adapter, type, NULL, 0, NULL, 0, D3D11_SDK_VERSION, NULL, &level, NULL); - - return SUCCEEDED(hr) ? level : (D3D_FEATURE_LEVEL)0; -} - -#else - -void GSmkdir(const char* dir) -{ - if (mkdir(dir, 0777)) - fprintf(stderr, "Failed to create directory: %s\n", dir); -} - -#endif diff --git a/plugins/GSdx_legacy/GSUtil.h b/plugins/GSdx_legacy/GSUtil.h deleted file mode 100644 index f1372775e3..0000000000 --- a/plugins/GSdx_legacy/GSUtil.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" - -struct OCLDeviceDesc -{ -#ifdef ENABLE_OPENCL - cl::Device device; -#endif - string name; - int version; - string tmppath; -}; - -class GSUtil -{ -public: - static const char* GetLibName(); - - static GS_PRIM_CLASS GetPrimClass(uint32 prim); - static int GetVertexCount(uint32 prim); - static int GetClassVertexCount(uint32 primclass); - - static const uint32* HasSharedBitsPtr(uint32 dpsm); - static bool HasSharedBits(uint32 spsm, const uint32* ptr); - static bool HasSharedBits(uint32 spsm, uint32 dpsm); - static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm); - static bool HasCompatibleBits(uint32 spsm, uint32 dpsm); - - static bool CheckSSE(); - -#ifdef ENABLE_OPENCL - static void GetDeviceDescs(list& dl); - static string GetDeviceUniqueName(cl::Device& device); -#endif - -#ifdef _WIN32 - - static bool CheckDirectX(); - static bool CheckDXGI(); - static bool CheckD3D11(); - static D3D_FEATURE_LEVEL CheckDirect3D11Level(IDXGIAdapter *adapter = NULL, D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE); - -#endif -}; - -#ifdef __linux__ -void GSmkdir(const char* dir); -#endif diff --git a/plugins/GSdx_legacy/GSVector.cpp b/plugins/GSdx_legacy/GSVector.cpp deleted file mode 100644 index d5d074ac3a..0000000000 --- a/plugins/GSdx_legacy/GSVector.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSVector.h" - -const GSVector4i GSVector4i::m_xff[17] = -{ - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), -}; - -const GSVector4i GSVector4i::m_x0f[17] = -{ - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), -}; - -const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f); -const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f); -const GSVector4 GSVector4::m_half(0.5f); -const GSVector4 GSVector4::m_one(1.0f); -const GSVector4 GSVector4::m_two(2.0f); -const GSVector4 GSVector4::m_four(4.0f); -const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000))); -const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000))); -const GSVector4 GSVector4::m_max(FLT_MAX); -const GSVector4 GSVector4::m_min(FLT_MIN); - -#if _M_SSE >= 0x500 - -const GSVector8 GSVector8::m_half(0.5f); -const GSVector8 GSVector8::m_one(1.0f); -const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))); -const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000))); -const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000))); -const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000))); -const GSVector8 GSVector8::m_max(FLT_MAX); -const GSVector8 GSVector8::m_min(FLT_MIN); - -#endif - -#if _M_SSE >= 0x501 - -const GSVector8i GSVector8i::m_xff[33] = -{ - GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), -}; - -const GSVector8i GSVector8i::m_x0f[33] = -{ - GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), -}; - -#endif - -GSVector4i GSVector4i::fit(int arx, int ary) const -{ - GSVector4i r = *this; - - if(arx > 0 && ary > 0) - { - int w = width(); - int h = height(); - - if(w * ary > h * arx) - { - w = h * arx / ary; - r.left = (r.left + r.right - w) >> 1; - if(r.left & 1) r.left++; - r.right = r.left + w; - } - else - { - h = w * ary / arx; - r.top = (r.top + r.bottom - h) >> 1; - if(r.top & 1) r.top++; - r.bottom = r.top + h; - } - - r = r.rintersect(*this); - } - else - { - r = *this; - } - - return r; -} - -static const int s_ar[][2] = {{0, 0}, {4, 3}, {16, 9}}; - -GSVector4i GSVector4i::fit(int preset) const -{ - GSVector4i r; - - if(preset > 0 && preset < (int)countof(s_ar)) - { - r = fit(s_ar[preset][0], s_ar[preset][1]); - } - else - { - r = *this; - } - - return r; -} diff --git a/plugins/GSdx_legacy/GSVector.h b/plugins/GSdx_legacy/GSVector.h deleted file mode 100644 index c07b951317..0000000000 --- a/plugins/GSdx_legacy/GSVector.h +++ /dev/null @@ -1,6048 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" - -#pragma once - -enum Align_Mode -{ - Align_Outside, - Align_Inside, - Align_NegInf, - Align_PosInf -}; - -enum Round_Mode -{ - Round_NearestInt = 8, - Round_NegInf = 9, - Round_PosInf = 10, - Round_Truncate = 11 -}; - -#pragma pack(push, 1) - -template class GSVector2T -{ -public: - union - { - struct {T x, y;}; - struct {T r, g;}; - struct {T v[2];}; - }; - - GSVector2T() - { - } - - GSVector2T(T x, T y) - { - this->x = x; - this->y = y; - } - - bool operator == (const GSVector2T& v) const - { - return x == v.x && y == v.y; - } - - bool operator != (const GSVector2T& v) const - { - return x != v.x || y != v.y; - } -}; - -typedef GSVector2T GSVector2; -typedef GSVector2T GSVector2i; - -class GSVector4; -class GSVector4i; - -#if _M_SSE >= 0x500 - -class GSVector8; - -#endif - -#if _M_SSE >= 0x501 - -class GSVector8i; - -#endif - -__aligned(class, 16) GSVector4i -{ - static const GSVector4i m_xff[17]; - static const GSVector4i m_x0f[17]; - -public: - union - { - struct {int x, y, z, w;}; - struct {int r, g, b, a;}; - struct {int left, top, right, bottom;}; - int v[4]; - float f32[4]; - int8 i8[16]; - int16 i16[8]; - int32 i32[4]; - int64 i64[2]; - uint8 u8[16]; - uint16 u16[8]; - uint32 u32[4]; - uint64 u64[2]; - __m128i m; - }; - - __forceinline GSVector4i() - { - } - - __forceinline GSVector4i(int x, int y, int z, int w) - { - // 4 gprs - - // m = _mm_set_epi32(w, z, y, x); - - // 2 gprs - - GSVector4i xz = load(x).upl32(load(z)); - GSVector4i yw = load(y).upl32(load(w)); - - *this = xz.upl32(yw); - } - - __forceinline GSVector4i(int x, int y) - { - *this = load(x).upl32(load(y)); - } - - __forceinline GSVector4i(short s0, short s1, short s2, short s3, short s4, short s5, short s6, short s7) - { - m = _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0); - } - - __forceinline GSVector4i(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15) - { - m = _mm_set_epi8(b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0); - } - - __forceinline GSVector4i(const GSVector4i& v) - { - m = v.m; - } - - __forceinline explicit GSVector4i(const GSVector2i& v) - { - m = _mm_loadl_epi64((__m128i*)&v); - } - - __forceinline explicit GSVector4i(int i) - { - *this = i; - } - - __forceinline explicit GSVector4i(__m128i m) - { - this->m = m; - } - - __forceinline explicit GSVector4i(const GSVector4& v, bool truncate = true); - - __forceinline static GSVector4i cast(const GSVector4& v); - - #if _M_SSE >= 0x500 - - __forceinline static GSVector4i cast(const GSVector8& v); - - #endif - - #if _M_SSE >= 0x501 - - __forceinline static GSVector4i cast(const GSVector8i& v); - - #endif - - __forceinline void operator = (const GSVector4i& v) - { - m = v.m; - } - - __forceinline void operator = (int i) - { - #if _M_SSE >= 0x501 - - m = _mm_broadcastd_epi32(_mm_cvtsi32_si128(i)); - - #else - - m = _mm_set1_epi32(i); - - #endif - } - - __forceinline void operator = (__m128i m) - { - this->m = m; - } - - __forceinline operator __m128i() const - { - return m; - } - - // rect - - __forceinline int width() const - { - return right - left; - } - - __forceinline int height() const - { - return bottom - top; - } - - __forceinline GSVector4i rsize() const - { - return *this - xyxy(); // same as GSVector4i(0, 0, width(), height()); - } - - __forceinline bool rempty() const - { - return (*this < zwzw()).mask() != 0x00ff; - } - - __forceinline GSVector4i runion(const GSVector4i& a) const - { - int i = (upl64(a) < uph64(a)).mask(); - - if(i == 0xffff) - { - return runion_ordered(a); - } - - if((i & 0x00ff) == 0x00ff) - { - return *this; - } - - if((i & 0xff00) == 0xff00) - { - return a; - } - - return GSVector4i::zero(); - } - - __forceinline GSVector4i runion_ordered(const GSVector4i& a) const - { - #if _M_SSE >= 0x401 - - return min_i32(a).upl64(max_i32(a).srl<8>()); - - #else - - return GSVector4i(min(x, a.x), min(y, a.y), max(z, a.z), max(w, a.w)); - - #endif - } - - __forceinline GSVector4i rintersect(const GSVector4i& a) const - { - return sat_i32(a); - } - - template __forceinline GSVector4i ralign(const GSVector2i& a) const - { - // a must be 1 << n - - GSVector4i mask = GSVector4i(a) - GSVector4i(1, 1); - - GSVector4i v; - - switch(mode) - { - case Align_Inside: v = *this + mask; break; - case Align_Outside: v = *this + mask.zwxy(); break; - case Align_NegInf: v = *this; break; - case Align_PosInf: v = *this + mask.zwzw(); break; - default: ASSERT(0); break; - } - - return v.andnot(mask.xyxy()); - } - - GSVector4i fit(int arx, int ary) const; - - GSVector4i fit(int preset) const; - - #ifdef _WIN32 - - __forceinline operator LPCRECT() const - { - return (LPCRECT)this; - } - - __forceinline operator LPRECT() - { - return (LPRECT)this; - } - - #endif - - // - - __forceinline uint32 rgba32() const - { - GSVector4i v = *this; - - v = v.ps32(v); - v = v.pu16(v); - - return (uint32)store(v); - } - - #if _M_SSE >= 0x401 - - __forceinline GSVector4i sat_i8(const GSVector4i& a, const GSVector4i& b) const - { - return max_i8(a).min_i8(b); - } - - __forceinline GSVector4i sat_i8(const GSVector4i& a) const - { - return max_i8(a.xyxy()).min_i8(a.zwzw()); - } - - #endif - - __forceinline GSVector4i sat_i16(const GSVector4i& a, const GSVector4i& b) const - { - return max_i16(a).min_i16(b); - } - - __forceinline GSVector4i sat_i16(const GSVector4i& a) const - { - return max_i16(a.xyxy()).min_i16(a.zwzw()); - } - - #if _M_SSE >= 0x401 - - __forceinline GSVector4i sat_i32(const GSVector4i& a, const GSVector4i& b) const - { - return max_i32(a).min_i32(b); - } - - __forceinline GSVector4i sat_i32(const GSVector4i& a) const - { - return max_i32(a.xyxy()).min_i32(a.zwzw()); - } - - #else - - __forceinline GSVector4i sat_i32(const GSVector4i& a, const GSVector4i& b) const - { - GSVector4i v; - - v.x = min(max(x, a.x), b.x); - v.y = min(max(y, a.y), b.y); - v.z = min(max(z, a.z), b.z); - v.w = min(max(w, a.w), b.w); - - return v; - } - - __forceinline GSVector4i sat_i32(const GSVector4i& a) const - { - GSVector4i v; - - v.x = min(max(x, a.x), a.z); - v.y = min(max(y, a.y), a.w); - v.z = min(max(z, a.x), a.z); - v.w = min(max(w, a.y), a.w); - - return v; - } - - #endif - - __forceinline GSVector4i sat_u8(const GSVector4i& a, const GSVector4i& b) const - { - return max_u8(a).min_u8(b); - } - - __forceinline GSVector4i sat_u8(const GSVector4i& a) const - { - return max_u8(a.xyxy()).min_u8(a.zwzw()); - } - - #if _M_SSE >= 0x401 - - __forceinline GSVector4i sat_u16(const GSVector4i& a, const GSVector4i& b) const - { - return max_u16(a).min_u16(b); - } - - __forceinline GSVector4i sat_u16(const GSVector4i& a) const - { - return max_u16(a.xyxy()).min_u16(a.zwzw()); - } - - #endif - - #if _M_SSE >= 0x401 - - __forceinline GSVector4i sat_u32(const GSVector4i& a, const GSVector4i& b) const - { - return max_u32(a).min_u32(b); - } - - __forceinline GSVector4i sat_u32(const GSVector4i& a) const - { - return max_u32(a.xyxy()).min_u32(a.zwzw()); - } - - #endif - - #if _M_SSE >= 0x401 - - __forceinline GSVector4i min_i8(const GSVector4i& a) const - { - return GSVector4i(_mm_min_epi8(m, a)); - } - - __forceinline GSVector4i max_i8(const GSVector4i& a) const - { - return GSVector4i(_mm_max_epi8(m, a)); - } - - #endif - - __forceinline GSVector4i min_i16(const GSVector4i& a) const - { - return GSVector4i(_mm_min_epi16(m, a)); - } - - __forceinline GSVector4i max_i16(const GSVector4i& a) const - { - return GSVector4i(_mm_max_epi16(m, a)); - } - - #if _M_SSE >= 0x401 - - __forceinline GSVector4i min_i32(const GSVector4i& a) const - { - return GSVector4i(_mm_min_epi32(m, a)); - } - - __forceinline GSVector4i max_i32(const GSVector4i& a) const - { - return GSVector4i(_mm_max_epi32(m, a)); - } - - #endif - - __forceinline GSVector4i min_u8(const GSVector4i& a) const - { - return GSVector4i(_mm_min_epu8(m, a)); - } - - __forceinline GSVector4i max_u8(const GSVector4i& a) const - { - return GSVector4i(_mm_max_epu8(m, a)); - } - - #if _M_SSE >= 0x401 - - __forceinline GSVector4i min_u16(const GSVector4i& a) const - { - return GSVector4i(_mm_min_epu16(m, a)); - } - - __forceinline GSVector4i max_u16(const GSVector4i& a) const - { - return GSVector4i(_mm_max_epu16(m, a)); - } - - __forceinline GSVector4i min_u32(const GSVector4i& a) const - { - return GSVector4i(_mm_min_epu32(m, a)); - } - - __forceinline GSVector4i max_u32(const GSVector4i& a) const - { - return GSVector4i(_mm_max_epu32(m, a)); - } - - #endif - - __forceinline static int min_i16(int a, int b) - { - return store(load(a).min_i16(load(b))); - } - - __forceinline GSVector4i clamp8() const - { - return pu16().upl8(); - } - - __forceinline GSVector4i blend8(const GSVector4i& a, const GSVector4i& mask) const - { - #if _M_SSE >= 0x401 - - return GSVector4i(_mm_blendv_epi8(m, a, mask)); - - #else - - return GSVector4i(_mm_or_si128(_mm_andnot_si128(mask, m), _mm_and_si128(mask, a))); - - #endif - } - - #if _M_SSE >= 0x401 - - template __forceinline GSVector4i blend16(const GSVector4i& a) const - { - return GSVector4i(_mm_blend_epi16(m, a, mask)); - } - - #endif - - #if _M_SSE >= 0x501 - - template __forceinline GSVector4i blend32(const GSVector4i& v) const - { - return GSVector4i(_mm_blend_epi32(m, v.m, mask)); - } - - #endif - - __forceinline GSVector4i blend(const GSVector4i& a, const GSVector4i& mask) const - { - return GSVector4i(_mm_or_si128(_mm_andnot_si128(mask, m), _mm_and_si128(mask, a))); - } - - __forceinline GSVector4i mix16(const GSVector4i& a) const - { - #if _M_SSE >= 0x401 - - return blend16<0xaa>(a); - - #else - - return blend8(a, GSVector4i::xffff0000()); - - #endif - } - - #if _M_SSE >= 0x301 - - __forceinline GSVector4i shuffle8(const GSVector4i& mask) const - { - return GSVector4i(_mm_shuffle_epi8(m, mask)); - } - - #endif - - __forceinline GSVector4i ps16(const GSVector4i& a) const - { - return GSVector4i(_mm_packs_epi16(m, a)); - } - - __forceinline GSVector4i ps16() const - { - return GSVector4i(_mm_packs_epi16(m, m)); - } - - __forceinline GSVector4i pu16(const GSVector4i& a) const - { - return GSVector4i(_mm_packus_epi16(m, a)); - } - - __forceinline GSVector4i pu16() const - { - return GSVector4i(_mm_packus_epi16(m, m)); - } - - __forceinline GSVector4i ps32(const GSVector4i& a) const - { - return GSVector4i(_mm_packs_epi32(m, a)); - } - - __forceinline GSVector4i ps32() const - { - return GSVector4i(_mm_packs_epi32(m, m)); - } - - #if _M_SSE >= 0x401 - - __forceinline GSVector4i pu32(const GSVector4i& a) const - { - return GSVector4i(_mm_packus_epi32(m, a)); - } - - __forceinline GSVector4i pu32() const - { - return GSVector4i(_mm_packus_epi32(m, m)); - } - - #endif - - __forceinline GSVector4i upl8(const GSVector4i& a) const - { - return GSVector4i(_mm_unpacklo_epi8(m, a)); - } - - __forceinline GSVector4i uph8(const GSVector4i& a) const - { - return GSVector4i(_mm_unpackhi_epi8(m, a)); - } - - __forceinline GSVector4i upl16(const GSVector4i& a) const - { - return GSVector4i(_mm_unpacklo_epi16(m, a)); - } - - __forceinline GSVector4i uph16(const GSVector4i& a) const - { - return GSVector4i(_mm_unpackhi_epi16(m, a)); - } - - __forceinline GSVector4i upl32(const GSVector4i& a) const - { - return GSVector4i(_mm_unpacklo_epi32(m, a)); - } - - __forceinline GSVector4i uph32(const GSVector4i& a) const - { - return GSVector4i(_mm_unpackhi_epi32(m, a)); - } - - __forceinline GSVector4i upl64(const GSVector4i& a) const - { - return GSVector4i(_mm_unpacklo_epi64(m, a)); - } - - __forceinline GSVector4i uph64(const GSVector4i& a) const - { - return GSVector4i(_mm_unpackhi_epi64(m, a)); - } - - __forceinline GSVector4i upl8() const - { - #if 0 // _M_SSE >= 0x401 // TODO: compiler bug - - return GSVector4i(_mm_cvtepu8_epi16(m)); - - #else - - return GSVector4i(_mm_unpacklo_epi8(m, _mm_setzero_si128())); - - #endif - } - - __forceinline GSVector4i uph8() const - { - return GSVector4i(_mm_unpackhi_epi8(m, _mm_setzero_si128())); - } - - __forceinline GSVector4i upl16() const - { - #if 0 //_M_SSE >= 0x401 // TODO: compiler bug - - return GSVector4i(_mm_cvtepu16_epi32(m)); - - #else - - return GSVector4i(_mm_unpacklo_epi16(m, _mm_setzero_si128())); - - #endif - } - - __forceinline GSVector4i uph16() const - { - return GSVector4i(_mm_unpackhi_epi16(m, _mm_setzero_si128())); - } - - __forceinline GSVector4i upl32() const - { - #if 0 //_M_SSE >= 0x401 // TODO: compiler bug - - return GSVector4i(_mm_cvtepu32_epi64(m)); - - #else - - return GSVector4i(_mm_unpacklo_epi32(m, _mm_setzero_si128())); - - #endif - } - - __forceinline GSVector4i uph32() const - { - return GSVector4i(_mm_unpackhi_epi32(m, _mm_setzero_si128())); - } - - __forceinline GSVector4i upl64() const - { - return GSVector4i(_mm_unpacklo_epi64(m, _mm_setzero_si128())); - } - - __forceinline GSVector4i uph64() const - { - return GSVector4i(_mm_unpackhi_epi64(m, _mm_setzero_si128())); - } - - #if _M_SSE >= 0x401 - - // WARNING!!! - // - // MSVC (2008, 2010 ctp) believes that there is a "mem, reg" form of the pmovz/sx* instructions, - // turning these intrinsics into a minefield, don't spill regs when using them... - - __forceinline GSVector4i i8to16() const - { - return GSVector4i(_mm_cvtepi8_epi16(m)); - } - - __forceinline GSVector4i u8to16() const - { - return GSVector4i(_mm_cvtepu8_epi16(m)); - } - - __forceinline GSVector4i i8to32() const - { - return GSVector4i(_mm_cvtepi8_epi32(m)); - } - - __forceinline GSVector4i u8to32() const - { - return GSVector4i(_mm_cvtepu8_epi32(m)); - } - - __forceinline GSVector4i i8to64() const - { - return GSVector4i(_mm_cvtepi8_epi64(m)); - } - - __forceinline GSVector4i u8to64() const - { - return GSVector4i(_mm_cvtepu16_epi64(m)); - } - - __forceinline GSVector4i i16to32() const - { - return GSVector4i(_mm_cvtepi16_epi32(m)); - } - - __forceinline GSVector4i u16to32() const - { - return GSVector4i(_mm_cvtepu16_epi32(m)); - } - - __forceinline GSVector4i i16to64() const - { - return GSVector4i(_mm_cvtepi16_epi64(m)); - } - - __forceinline GSVector4i u16to64() const - { - return GSVector4i(_mm_cvtepu16_epi64(m)); - } - - __forceinline GSVector4i i32to64() const - { - return GSVector4i(_mm_cvtepi32_epi64(m)); - } - - __forceinline GSVector4i u32to64() const - { - return GSVector4i(_mm_cvtepu32_epi64(m)); - } - - #else - - __forceinline GSVector4i u8to16() const - { - return upl8(); - } - - __forceinline GSVector4i u8to32() const - { - return upl8().upl16(); - } - - __forceinline GSVector4i u8to64() const - { - return upl8().upl16().upl32(); - } - - __forceinline GSVector4i u16to32() const - { - return upl16(); - } - - __forceinline GSVector4i u16to64() const - { - return upl16().upl32(); - } - - __forceinline GSVector4i u32to64() const - { - return upl32(); - } - - __forceinline GSVector4i i8to16() const - { - return zero().upl8(*this).sra16(8); - } - - __forceinline GSVector4i i16to32() const - { - return zero().upl16(*this).sra32(16); - } - - #endif - - template __forceinline GSVector4i srl() const - { - return GSVector4i(_mm_srli_si128(m, i)); - } - - template __forceinline GSVector4i srl(const GSVector4i& v) - { - #if _M_SSE >= 0x301 - - return GSVector4i(_mm_alignr_epi8(v.m, m, i)); - - #else - - if(i == 0) return *this; - else if(i < 16) return srl() | v.sll<16 - i>(); - else if(i == 16) return v; - else if(i < 32) return v.srl(); - else return zero(); - - #endif - } - - template __forceinline GSVector4i sll() const - { - return GSVector4i(_mm_slli_si128(m, i)); - } - - __forceinline GSVector4i sra16(int i) const - { - return GSVector4i(_mm_srai_epi16(m, i)); - } - - __forceinline GSVector4i sra16(__m128i i) const - { - return GSVector4i(_mm_sra_epi16(m, i)); - } - - __forceinline GSVector4i sra32(int i) const - { - return GSVector4i(_mm_srai_epi32(m, i)); - } - - __forceinline GSVector4i sra32(__m128i i) const - { - return GSVector4i(_mm_sra_epi32(m, i)); - } - - __forceinline GSVector4i sll16(int i) const - { - return GSVector4i(_mm_slli_epi16(m, i)); - } - - __forceinline GSVector4i sll16(__m128i i) const - { - return GSVector4i(_mm_sll_epi16(m, i)); - } - - __forceinline GSVector4i sll32(int i) const - { - return GSVector4i(_mm_slli_epi32(m, i)); - } - - __forceinline GSVector4i sll32(__m128i i) const - { - return GSVector4i(_mm_sll_epi32(m, i)); - } - - __forceinline GSVector4i sll64(int i) const - { - return GSVector4i(_mm_slli_epi64(m, i)); - } - - __forceinline GSVector4i sll64(__m128i i) const - { - return GSVector4i(_mm_sll_epi64(m, i)); - } - - __forceinline GSVector4i srl16(int i) const - { - return GSVector4i(_mm_srli_epi16(m, i)); - } - - __forceinline GSVector4i srl16(__m128i i) const - { - return GSVector4i(_mm_srl_epi16(m, i)); - } - - __forceinline GSVector4i srl32(int i) const - { - return GSVector4i(_mm_srli_epi32(m, i)); - } - - __forceinline GSVector4i srl32(__m128i i) const - { - return GSVector4i(_mm_srl_epi32(m, i)); - } - - __forceinline GSVector4i srl64(int i) const - { - return GSVector4i(_mm_srli_epi64(m, i)); - } - - __forceinline GSVector4i srl64(__m128i i) const - { - return GSVector4i(_mm_srl_epi64(m, i)); - } - - __forceinline GSVector4i add8(const GSVector4i& v) const - { - return GSVector4i(_mm_add_epi8(m, v.m)); - } - - __forceinline GSVector4i add16(const GSVector4i& v) const - { - return GSVector4i(_mm_add_epi16(m, v.m)); - } - - __forceinline GSVector4i add32(const GSVector4i& v) const - { - return GSVector4i(_mm_add_epi32(m, v.m)); - } - - __forceinline GSVector4i adds8(const GSVector4i& v) const - { - return GSVector4i(_mm_adds_epi8(m, v.m)); - } - - __forceinline GSVector4i adds16(const GSVector4i& v) const - { - return GSVector4i(_mm_adds_epi16(m, v.m)); - } - - __forceinline GSVector4i addus8(const GSVector4i& v) const - { - return GSVector4i(_mm_adds_epu8(m, v.m)); - } - - __forceinline GSVector4i addus16(const GSVector4i& v) const - { - return GSVector4i(_mm_adds_epu16(m, v.m)); - } - - __forceinline GSVector4i sub8(const GSVector4i& v) const - { - return GSVector4i(_mm_sub_epi8(m, v.m)); - } - - __forceinline GSVector4i sub16(const GSVector4i& v) const - { - return GSVector4i(_mm_sub_epi16(m, v.m)); - } - - __forceinline GSVector4i sub32(const GSVector4i& v) const - { - return GSVector4i(_mm_sub_epi32(m, v.m)); - } - - __forceinline GSVector4i subs8(const GSVector4i& v) const - { - return GSVector4i(_mm_subs_epi8(m, v.m)); - } - - __forceinline GSVector4i subs16(const GSVector4i& v) const - { - return GSVector4i(_mm_subs_epi16(m, v.m)); - } - - __forceinline GSVector4i subus8(const GSVector4i& v) const - { - return GSVector4i(_mm_subs_epu8(m, v.m)); - } - - __forceinline GSVector4i subus16(const GSVector4i& v) const - { - return GSVector4i(_mm_subs_epu16(m, v.m)); - } - - __forceinline GSVector4i avg8(const GSVector4i& v) const - { - return GSVector4i(_mm_avg_epu8(m, v.m)); - } - - __forceinline GSVector4i avg16(const GSVector4i& v) const - { - return GSVector4i(_mm_avg_epu16(m, v.m)); - } - - __forceinline GSVector4i mul16hs(const GSVector4i& v) const - { - return GSVector4i(_mm_mulhi_epi16(m, v.m)); - } - - __forceinline GSVector4i mul16hu(const GSVector4i& v) const - { - return GSVector4i(_mm_mulhi_epu16(m, v.m)); - } - - __forceinline GSVector4i mul16l(const GSVector4i& v) const - { - return GSVector4i(_mm_mullo_epi16(m, v.m)); - } - - #if _M_SSE >= 0x301 - - __forceinline GSVector4i mul16hrs(const GSVector4i& v) const - { - return GSVector4i(_mm_mulhrs_epi16(m, v.m)); - } - - #endif - - GSVector4i madd(const GSVector4i& v) const - { - return GSVector4i(_mm_madd_epi16(m, v.m)); - } - - template __forceinline GSVector4i lerp16(const GSVector4i& a, const GSVector4i& f) const - { - // (a - this) * f << shift + this - - return add16(a.sub16(*this).modulate16(f)); - } - - template __forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c) - { - // (a - b) * c << shift - - return a.sub16(b).modulate16(c); - } - - template __forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c, const GSVector4i& d) - { - // (a - b) * c << shift + d - - return d.add16(a.sub16(b).modulate16(c)); - } - - __forceinline GSVector4i lerp16_4(const GSVector4i& a, const GSVector4i& f) const - { - // (a - this) * f >> 4 + this (a, this: 8-bit, f: 4-bit) - - return add16(a.sub16(*this).mul16l(f).sra16(4)); - } - - template __forceinline GSVector4i modulate16(const GSVector4i& f) const - { - // a * f << shift - - #if _M_SSE >= 0x301 - - if(shift == 0) - { - return mul16hrs(f); - } - - #endif - - return sll16(shift + 1).mul16hs(f); - } - - __forceinline bool eq(const GSVector4i& v) const - { - #if _M_SSE >= 0x401 - - // pxor, ptest, je - - GSVector4i t = *this ^ v; - - return _mm_testz_si128(t, t) != 0; - - #else - - // pcmpeqd, pmovmskb, cmp, je - - return eq32(v).alltrue(); - - #endif - } - - __forceinline GSVector4i eq8(const GSVector4i& v) const - { - return GSVector4i(_mm_cmpeq_epi8(m, v.m)); - } - - __forceinline GSVector4i eq16(const GSVector4i& v) const - { - return GSVector4i(_mm_cmpeq_epi16(m, v.m)); - } - - __forceinline GSVector4i eq32(const GSVector4i& v) const - { - return GSVector4i(_mm_cmpeq_epi32(m, v.m)); - } - - __forceinline GSVector4i neq8(const GSVector4i& v) const - { - return ~eq8(v); - } - - __forceinline GSVector4i neq16(const GSVector4i& v) const - { - return ~eq16(v); - } - - __forceinline GSVector4i neq32(const GSVector4i& v) const - { - return ~eq32(v); - } - - __forceinline GSVector4i gt8(const GSVector4i& v) const - { - return GSVector4i(_mm_cmpgt_epi8(m, v.m)); - } - - __forceinline GSVector4i gt16(const GSVector4i& v) const - { - return GSVector4i(_mm_cmpgt_epi16(m, v.m)); - } - - __forceinline GSVector4i gt32(const GSVector4i& v) const - { - return GSVector4i(_mm_cmpgt_epi32(m, v.m)); - } - - __forceinline GSVector4i lt8(const GSVector4i& v) const - { - return GSVector4i(_mm_cmplt_epi8(m, v.m)); - } - - __forceinline GSVector4i lt16(const GSVector4i& v) const - { - return GSVector4i(_mm_cmplt_epi16(m, v.m)); - } - - __forceinline GSVector4i lt32(const GSVector4i& v) const - { - return GSVector4i(_mm_cmplt_epi32(m, v.m)); - } - - __forceinline GSVector4i andnot(const GSVector4i& v) const - { - return GSVector4i(_mm_andnot_si128(v.m, m)); - } - - __forceinline int mask() const - { - return _mm_movemask_epi8(m); - } - - __forceinline bool alltrue() const - { - return mask() == 0xffff; - } - - __forceinline bool allfalse() const - { - #if _M_SSE >= 0x401 - - return _mm_testz_si128(m, m) != 0; - - #else - - return mask() == 0; - - #endif - } - - #if _M_SSE >= 0x401 - - template __forceinline GSVector4i insert8(int a) const - { - return GSVector4i(_mm_insert_epi8(m, a, i)); - } - - #endif - - template __forceinline int extract8() const - { - #if _M_SSE >= 0x401 - - return _mm_extract_epi8(m, i); - - #else - - return (int)u8[i]; - - #endif - } - - template __forceinline GSVector4i insert16(int a) const - { - return GSVector4i(_mm_insert_epi16(m, a, i)); - } - - template __forceinline int extract16() const - { - return _mm_extract_epi16(m, i); - } - - #if _M_SSE >= 0x401 - - template __forceinline GSVector4i insert32(int a) const - { - return GSVector4i(_mm_insert_epi32(m, a, i)); - } - - #endif - - template __forceinline int extract32() const - { - if(i == 0) return GSVector4i::store(*this); - - #if _M_SSE >= 0x401 - - return _mm_extract_epi32(m, i); - - #else - - return i32[i]; - - #endif - } - - #ifdef _M_AMD64 - - #if _M_SSE >= 0x401 - - template __forceinline GSVector4i insert64(int64 a) const - { - return GSVector4i(_mm_insert_epi64(m, a, i)); - } - - #endif - - template __forceinline int64 extract64() const - { - if(i == 0) return GSVector4i::storeq(*this); - - #if _M_SSE >= 0x401 - - return _mm_extract_epi64(m, i); - - #else - - return i64[i]; - - #endif - } - - #endif - - #if _M_SSE >= 0x401 - - template __forceinline GSVector4i gather8_4(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract8() & 0xf]); - v = v.insert8<1>((int)ptr[extract8() >> 4]); - v = v.insert8<2>((int)ptr[extract8() & 0xf]); - v = v.insert8<3>((int)ptr[extract8() >> 4]); - v = v.insert8<4>((int)ptr[extract8() & 0xf]); - v = v.insert8<5>((int)ptr[extract8() >> 4]); - v = v.insert8<6>((int)ptr[extract8() & 0xf]); - v = v.insert8<7>((int)ptr[extract8() >> 4]); - v = v.insert8<8>((int)ptr[extract8() & 0xf]); - v = v.insert8<9>((int)ptr[extract8() >> 4]); - v = v.insert8<10>((int)ptr[extract8() & 0xf]); - v = v.insert8<11>((int)ptr[extract8() >> 4]); - v = v.insert8<12>((int)ptr[extract8() & 0xf]); - v = v.insert8<13>((int)ptr[extract8() >> 4]); - v = v.insert8<14>((int)ptr[extract8() & 0xf]); - v = v.insert8<15>((int)ptr[extract8() >> 4]); - - return v; - } - - template __forceinline GSVector4i gather8_8(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract8<0>()]); - v = v.insert8<1>((int)ptr[extract8<1>()]); - v = v.insert8<2>((int)ptr[extract8<2>()]); - v = v.insert8<3>((int)ptr[extract8<3>()]); - v = v.insert8<4>((int)ptr[extract8<4>()]); - v = v.insert8<5>((int)ptr[extract8<5>()]); - v = v.insert8<6>((int)ptr[extract8<6>()]); - v = v.insert8<7>((int)ptr[extract8<7>()]); - v = v.insert8<8>((int)ptr[extract8<8>()]); - v = v.insert8<9>((int)ptr[extract8<9>()]); - v = v.insert8<10>((int)ptr[extract8<10>()]); - v = v.insert8<11>((int)ptr[extract8<11>()]); - v = v.insert8<12>((int)ptr[extract8<12>()]); - v = v.insert8<13>((int)ptr[extract8<13>()]); - v = v.insert8<14>((int)ptr[extract8<14>()]); - v = v.insert8<15>((int)ptr[extract8<15>()]); - - return v; - } - - template __forceinline GSVector4i gather8_16(const T* ptr, const GSVector4i& a) const - { - GSVector4i v = a; - - v = v.insert8((int)ptr[extract16<0>()]); - v = v.insert8((int)ptr[extract16<1>()]); - v = v.insert8((int)ptr[extract16<2>()]); - v = v.insert8((int)ptr[extract16<3>()]); - v = v.insert8((int)ptr[extract16<4>()]); - v = v.insert8((int)ptr[extract16<5>()]); - v = v.insert8((int)ptr[extract16<6>()]); - v = v.insert8((int)ptr[extract16<7>()]); - - return v; - } - - template __forceinline GSVector4i gather8_32(const T* ptr, const GSVector4i& a) const - { - GSVector4i v = a; - - v = v.insert8((int)ptr[extract32<0>()]); - v = v.insert8((int)ptr[extract32<1>()]); - v = v.insert8((int)ptr[extract32<2>()]); - v = v.insert8((int)ptr[extract32<3>()]); - - return v; - } - - #endif - - template __forceinline GSVector4i gather16_4(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract8() & 0xf]); - v = v.insert16<1>((int)ptr[extract8() >> 4]); - v = v.insert16<2>((int)ptr[extract8() & 0xf]); - v = v.insert16<3>((int)ptr[extract8() >> 4]); - v = v.insert16<4>((int)ptr[extract8() & 0xf]); - v = v.insert16<5>((int)ptr[extract8() >> 4]); - v = v.insert16<6>((int)ptr[extract8() & 0xf]); - v = v.insert16<7>((int)ptr[extract8() >> 4]); - - return v; - } - - template __forceinline GSVector4i gather16_8(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract8()]); - v = v.insert16<1>((int)ptr[extract8()]); - v = v.insert16<2>((int)ptr[extract8()]); - v = v.insert16<3>((int)ptr[extract8()]); - v = v.insert16<4>((int)ptr[extract8()]); - v = v.insert16<5>((int)ptr[extract8()]); - v = v.insert16<6>((int)ptr[extract8()]); - v = v.insert16<7>((int)ptr[extract8()]); - - return v; - } - - template__forceinline GSVector4i gather16_16(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract16<0>()]); - v = v.insert16<1>((int)ptr[extract16<1>()]); - v = v.insert16<2>((int)ptr[extract16<2>()]); - v = v.insert16<3>((int)ptr[extract16<3>()]); - v = v.insert16<4>((int)ptr[extract16<4>()]); - v = v.insert16<5>((int)ptr[extract16<5>()]); - v = v.insert16<6>((int)ptr[extract16<6>()]); - v = v.insert16<7>((int)ptr[extract16<7>()]); - - return v; - } - - template__forceinline GSVector4i gather16_16(const T1* ptr1, const T2* ptr2) const - { - GSVector4i v; - - v = load((int)ptr2[ptr1[extract16<0>()]]); - v = v.insert16<1>((int)ptr2[ptr1[extract16<1>()]]); - v = v.insert16<2>((int)ptr2[ptr1[extract16<2>()]]); - v = v.insert16<3>((int)ptr2[ptr1[extract16<3>()]]); - v = v.insert16<4>((int)ptr2[ptr1[extract16<4>()]]); - v = v.insert16<5>((int)ptr2[ptr1[extract16<5>()]]); - v = v.insert16<6>((int)ptr2[ptr1[extract16<6>()]]); - v = v.insert16<7>((int)ptr2[ptr1[extract16<7>()]]); - - return v; - } - - template __forceinline GSVector4i gather16_32(const T* ptr, const GSVector4i& a) const - { - GSVector4i v = a; - - v = v.insert16((int)ptr[extract32<0>()]); - v = v.insert16((int)ptr[extract32<1>()]); - v = v.insert16((int)ptr[extract32<2>()]); - v = v.insert16((int)ptr[extract32<3>()]); - - return v; - } - - #if _M_SSE >= 0x401 - - template __forceinline GSVector4i gather32_4(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract8() & 0xf]); - v = v.insert32<1>((int)ptr[extract8() >> 4]); - v = v.insert32<2>((int)ptr[extract8() & 0xf]); - v = v.insert32<3>((int)ptr[extract8() >> 4]); - return v; - } - - template __forceinline GSVector4i gather32_8(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract8()]); - v = v.insert32<1>((int)ptr[extract8()]); - v = v.insert32<2>((int)ptr[extract8()]); - v = v.insert32<3>((int)ptr[extract8()]); - - return v; - } - - template __forceinline GSVector4i gather32_16(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract16()]); - v = v.insert32<1>((int)ptr[extract16()]); - v = v.insert32<2>((int)ptr[extract16()]); - v = v.insert32<3>((int)ptr[extract16()]); - - return v; - } - - template __forceinline GSVector4i gather32_32(const T* ptr) const - { - GSVector4i v; - - v = load((int)ptr[extract32<0>()]); - v = v.insert32<1>((int)ptr[extract32<1>()]); - v = v.insert32<2>((int)ptr[extract32<2>()]); - v = v.insert32<3>((int)ptr[extract32<3>()]); - - return v; - } - - template __forceinline GSVector4i gather32_32(const T1* ptr1, const T2* ptr2) const - { - GSVector4i v; - - v = load((int)ptr2[ptr1[extract32<0>()]]); - v = v.insert32<1>((int)ptr2[ptr1[extract32<1>()]]); - v = v.insert32<2>((int)ptr2[ptr1[extract32<2>()]]); - v = v.insert32<3>((int)ptr2[ptr1[extract32<3>()]]); - - return v; - } - - #else - - template __forceinline GSVector4i gather32_4(const T* ptr) const - { - return GSVector4i( - (int)ptr[extract8() & 0xf], - (int)ptr[extract8() >> 4], - (int)ptr[extract8() & 0xf], - (int)ptr[extract8() >> 4]); - } - - template __forceinline GSVector4i gather32_8(const T* ptr) const - { - return GSVector4i( - (int)ptr[extract8()], - (int)ptr[extract8()], - (int)ptr[extract8()], - (int)ptr[extract8()]); - } - - template __forceinline GSVector4i gather32_16(const T* ptr) const - { - return GSVector4i( - (int)ptr[extract16()], - (int)ptr[extract16()], - (int)ptr[extract16()], - (int)ptr[extract16()]); - } - - template __forceinline GSVector4i gather32_32(const T* ptr) const - { - return GSVector4i( - (int)ptr[extract32<0>()], - (int)ptr[extract32<1>()], - (int)ptr[extract32<2>()], - (int)ptr[extract32<3>()]); - } - - template __forceinline GSVector4i gather32_32(const T1* ptr1, const T2* ptr2) const - { - return GSVector4i( - (int)ptr2[ptr1[extract32<0>()]], - (int)ptr2[ptr1[extract32<1>()]], - (int)ptr2[ptr1[extract32<2>()]], - (int)ptr2[ptr1[extract32<3>()]]); - } - - #endif - - #if defined(_M_AMD64) && _M_SSE >= 0x401 - - template __forceinline GSVector4i gather64_4(const T* ptr) const - { - GSVector4i v; - - v = loadq((int64)ptr[extract8() & 0xf]); - v = v.insert64<1>((int64)ptr[extract8() >> 4]); - - return v; - } - - template __forceinline GSVector4i gather64_8(const T* ptr) const - { - GSVector4i v; - - v = loadq((int64)ptr[extract8()]); - v = v.insert64<1>((int64)ptr[extract8()]); - - return v; - } - - template __forceinline GSVector4i gather64_16(const T* ptr) const - { - GSVector4i v; - - v = loadq((int64)ptr[extract16()]); - v = v.insert64<1>((int64)ptr[extract16()]); - - return v; - } - - template __forceinline GSVector4i gather64_32(const T* ptr) const - { - GSVector4i v; - - v = loadq((int64)ptr[extract32()]); - v = v.insert64<1>((int64)ptr[extract32()]); - - return v; - } - - template __forceinline GSVector4i gather64_64(const T* ptr) const - { - GSVector4i v; - - v = loadq((int64)ptr[extract64<0>()]); - v = v.insert64<1>((int64)ptr[extract64<1>()]); - - return v; - } - - #else - - template __forceinline GSVector4i gather64_4(const T* ptr) const - { - GSVector4i v; - - v = loadu(&ptr[extract8() & 0xf], &ptr[extract8() >> 4]); - - return v; - } - - template __forceinline GSVector4i gather64_8(const T* ptr) const - { - GSVector4i v; - - v = load(&ptr[extract8()], &ptr[extract8()]); - - return v; - } - - template __forceinline GSVector4i gather64_16(const T* ptr) const - { - GSVector4i v; - - v = load(&ptr[extract16()], &ptr[extract16()]); - - return v; - } - - template __forceinline GSVector4i gather64_32(const T* ptr) const - { - GSVector4i v; - - v = load(&ptr[extract32()], &ptr[extract32()]); - - return v; - } - - #endif - - #if _M_SSE >= 0x401 - - template __forceinline void gather8_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather8_4<0>(ptr); - dst[1] = gather8_4<8>(ptr); - } - - __forceinline void gather8_8(const uint8* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather8_8<>(ptr); - } - - #endif - - template __forceinline void gather16_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather16_4<0>(ptr); - dst[1] = gather16_4<4>(ptr); - dst[2] = gather16_4<8>(ptr); - dst[3] = gather16_4<12>(ptr); - } - - template __forceinline void gather16_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather16_8<0>(ptr); - dst[1] = gather16_8<8>(ptr); - } - - template __forceinline void gather16_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather16_16<>(ptr); - } - - template __forceinline void gather32_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather32_4<0>(ptr); - dst[1] = gather32_4<2>(ptr); - dst[2] = gather32_4<4>(ptr); - dst[3] = gather32_4<6>(ptr); - dst[4] = gather32_4<8>(ptr); - dst[5] = gather32_4<10>(ptr); - dst[6] = gather32_4<12>(ptr); - dst[7] = gather32_4<14>(ptr); - } - - template __forceinline void gather32_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather32_8<0>(ptr); - dst[1] = gather32_8<4>(ptr); - dst[2] = gather32_8<8>(ptr); - dst[3] = gather32_8<12>(ptr); - } - - template __forceinline void gather32_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather32_16<0>(ptr); - dst[1] = gather32_16<4>(ptr); - } - - template __forceinline void gather32_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather32_32<>(ptr); - } - - template __forceinline void gather64_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather64_4<0>(ptr); - dst[1] = gather64_4<1>(ptr); - dst[2] = gather64_4<2>(ptr); - dst[3] = gather64_4<3>(ptr); - dst[4] = gather64_4<4>(ptr); - dst[5] = gather64_4<5>(ptr); - dst[6] = gather64_4<6>(ptr); - dst[7] = gather64_4<7>(ptr); - dst[8] = gather64_4<8>(ptr); - dst[9] = gather64_4<9>(ptr); - dst[10] = gather64_4<10>(ptr); - dst[11] = gather64_4<11>(ptr); - dst[12] = gather64_4<12>(ptr); - dst[13] = gather64_4<13>(ptr); - dst[14] = gather64_4<14>(ptr); - dst[15] = gather64_4<15>(ptr); - } - - template __forceinline void gather64_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather64_8<0>(ptr); - dst[1] = gather64_8<2>(ptr); - dst[2] = gather64_8<4>(ptr); - dst[3] = gather64_8<6>(ptr); - dst[4] = gather64_8<8>(ptr); - dst[5] = gather64_8<10>(ptr); - dst[6] = gather64_8<12>(ptr); - dst[7] = gather64_8<14>(ptr); - } - - template __forceinline void gather64_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather64_16<0>(ptr); - dst[1] = gather64_16<2>(ptr); - dst[2] = gather64_16<4>(ptr); - dst[3] = gather64_16<8>(ptr); - } - - template __forceinline void gather64_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather64_32<0>(ptr); - dst[1] = gather64_32<2>(ptr); - } - - #ifdef _M_AMD64 - - template __forceinline void gather64_64(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const - { - dst[0] = gather64_64<>(ptr); - } - - #endif - - __forceinline static GSVector4i loadnt(const void* p) - { - #if _M_SSE >= 0x401 - - return GSVector4i(_mm_stream_load_si128((__m128i*)p)); - - #else - - return GSVector4i(_mm_load_si128((__m128i*)p)); - - #endif - } - - __forceinline static GSVector4i loadl(const void* p) - { - return GSVector4i(_mm_loadl_epi64((__m128i*)p)); - } - - __forceinline static GSVector4i loadh(const void* p) - { - return GSVector4i(_mm_castps_si128(_mm_loadh_pi(_mm_setzero_ps(), (__m64*)p))); - } - - __forceinline static GSVector4i loadh(const void* p, const GSVector4i& v) - { - return GSVector4i(_mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(v.m), (__m64*)p))); - } - - __forceinline static GSVector4i load(const void* pl, const void* ph) - { - return loadh(ph, loadl(pl)); - } -/* - __forceinline static GSVector4i load(const void* pl, const void* ph) - { - __m128i lo = _mm_loadl_epi64((__m128i*)pl); - __m128i hi = _mm_loadl_epi64((__m128i*)ph); - - return GSVector4i(_mm_unpacklo_epi64(lo, hi)); - } -*/ - template __forceinline static GSVector4i load(const void* p) - { - return GSVector4i(aligned ? _mm_load_si128((__m128i*)p) : _mm_loadu_si128((__m128i*)p)); - } - - __forceinline static GSVector4i load(int i) - { - return GSVector4i(_mm_cvtsi32_si128(i)); - } - - #ifdef _M_AMD64 - - __forceinline static GSVector4i loadq(int64 i) - { - return GSVector4i(_mm_cvtsi64_si128(i)); - } - - #endif - - __forceinline static void storent(void* p, const GSVector4i& v) - { - _mm_stream_si128((__m128i*)p, v.m); - } - - __forceinline static void storel(void* p, const GSVector4i& v) - { - _mm_storel_epi64((__m128i*)p, v.m); - } - - __forceinline static void storeh(void* p, const GSVector4i& v) - { - _mm_storeh_pi((__m64*)p, _mm_castsi128_ps(v.m)); - } - - __forceinline static void store(void* pl, void* ph, const GSVector4i& v) - { - GSVector4i::storel(pl, v); - GSVector4i::storeh(ph, v); - } - - template __forceinline static void store(void* p, const GSVector4i& v) - { - if(aligned) _mm_store_si128((__m128i*)p, v.m); - else _mm_storeu_si128((__m128i*)p, v.m); - } - - __forceinline static int store(const GSVector4i& v) - { - return _mm_cvtsi128_si32(v.m); - } - - #ifdef _M_AMD64 - - __forceinline static int64 storeq(const GSVector4i& v) - { - return _mm_cvtsi128_si64(v.m); - } - - #endif - - __forceinline static void storent(void* RESTRICT dst, const void* RESTRICT src, size_t size) - { - const GSVector4i* s = (const GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - if(size == 0) return; - - size_t i = 0; - size_t j = size >> 6; - - for(; i < j; i++, s += 4, d += 4) - { - storent(&d[0], s[0]); - storent(&d[1], s[1]); - storent(&d[2], s[2]); - storent(&d[3], s[3]); - } - - size &= 63; - - if(size == 0) return; - - memcpy(d, s, size); - } - - __forceinline static void transpose(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) - { - _MM_TRANSPOSE4_SI128(a.m, b.m, c.m, d.m); - } - - __forceinline static void sw4(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) - { - const __m128i epi32_0f0f0f0f = _mm_set1_epi32(0x0f0f0f0f); - - GSVector4i mask(epi32_0f0f0f0f); - - GSVector4i e = (b << 4).blend(a, mask); - GSVector4i f = b.blend(a >> 4, mask); - GSVector4i g = (d << 4).blend(c, mask); - GSVector4i h = d.blend(c >> 4, mask); - - a = e.upl8(f); - c = e.uph8(f); - b = g.upl8(h); - d = g.uph8(h); - } - - __forceinline static void sw8(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) - { - GSVector4i e = a; - GSVector4i f = c; - - a = e.upl8(b); - c = e.uph8(b); - b = f.upl8(d); - d = f.uph8(d); - } - - __forceinline static void sw16(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) - { - GSVector4i e = a; - GSVector4i f = c; - - a = e.upl16(b); - c = e.uph16(b); - b = f.upl16(d); - d = f.uph16(d); - } - - __forceinline static void sw16rl(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) - { - GSVector4i e = a; - GSVector4i f = c; - - a = b.upl16(e); - c = e.uph16(b); - b = d.upl16(f); - d = f.uph16(d); - } - - __forceinline static void sw16rh(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) - { - GSVector4i e = a; - GSVector4i f = c; - - a = e.upl16(b); - c = b.uph16(e); - b = f.upl16(d); - d = d.uph16(f); - } - - __forceinline static void sw32(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) - { - GSVector4i e = a; - GSVector4i f = c; - - a = e.upl32(b); - c = e.uph32(b); - b = f.upl32(d); - d = f.uph32(d); - } - - __forceinline static void sw64(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) - { - GSVector4i e = a; - GSVector4i f = c; - - a = e.upl64(b); - c = e.uph64(b); - b = f.upl64(d); - d = f.uph64(d); - } - - __forceinline static bool compare16(const void* dst, const void* src, size_t size) - { - ASSERT((size & 15) == 0); - - size >>= 4; - - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - for(size_t i = 0; i < size; i++) - { - if(!d[i].eq(s[i])) - { - return false; - } - } - - return true; - } - - __forceinline static bool compare64(const void* dst, const void* src, size_t size) - { - ASSERT((size & 63) == 0); - - size >>= 6; - - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - for(size_t i = 0; i < size; i += 4) - { - GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]); - GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]); - GSVector4i v2 = (d[i * 4 + 2] == s[i * 4 + 2]); - GSVector4i v3 = (d[i * 4 + 3] == s[i * 4 + 3]); - - v0 = v0 & v1; - v2 = v2 & v3; - - if(!(v0 & v2).alltrue()) - { - return false; - } - } - - return true; - } - - __forceinline static bool update(const void* dst, const void* src, size_t size) - { - ASSERT((size & 15) == 0); - - size >>= 4; - - GSVector4i* s = (GSVector4i*)src; - GSVector4i* d = (GSVector4i*)dst; - - GSVector4i v = GSVector4i::xffffffff(); - - for(size_t i = 0; i < size; i++) - { - v &= d[i] == s[i]; - - d[i] = s[i]; - } - - return v.alltrue(); - } - - __forceinline void operator += (const GSVector4i& v) - { - m = _mm_add_epi32(m, v); - } - - __forceinline void operator -= (const GSVector4i& v) - { - m = _mm_sub_epi32(m, v); - } - - __forceinline void operator += (int i) - { - *this += GSVector4i(i); - } - - __forceinline void operator -= (int i) - { - *this -= GSVector4i(i); - } - - __forceinline void operator <<= (const int i) - { - m = _mm_slli_epi32(m, i); - } - - __forceinline void operator >>= (const int i) - { - m = _mm_srli_epi32(m, i); - } - - __forceinline void operator &= (const GSVector4i& v) - { - m = _mm_and_si128(m, v); - } - - __forceinline void operator |= (const GSVector4i& v) - { - m = _mm_or_si128(m, v); - } - - __forceinline void operator ^= (const GSVector4i& v) - { - m = _mm_xor_si128(m, v); - } - - __forceinline friend GSVector4i operator + (const GSVector4i& v1, const GSVector4i& v2) - { - return GSVector4i(_mm_add_epi32(v1, v2)); - } - - __forceinline friend GSVector4i operator - (const GSVector4i& v1, const GSVector4i& v2) - { - return GSVector4i(_mm_sub_epi32(v1, v2)); - } - - __forceinline friend GSVector4i operator + (const GSVector4i& v, int i) - { - return v + GSVector4i(i); - } - - __forceinline friend GSVector4i operator - (const GSVector4i& v, int i) - { - return v - GSVector4i(i); - } - - __forceinline friend GSVector4i operator << (const GSVector4i& v, const int i) - { - return GSVector4i(_mm_slli_epi32(v, i)); - } - - __forceinline friend GSVector4i operator >> (const GSVector4i& v, const int i) - { - return GSVector4i(_mm_srli_epi32(v, i)); - } - - __forceinline friend GSVector4i operator & (const GSVector4i& v1, const GSVector4i& v2) - { - return GSVector4i(_mm_and_si128(v1, v2)); - } - - __forceinline friend GSVector4i operator | (const GSVector4i& v1, const GSVector4i& v2) - { - return GSVector4i(_mm_or_si128(v1, v2)); - } - - __forceinline friend GSVector4i operator ^ (const GSVector4i& v1, const GSVector4i& v2) - { - return GSVector4i(_mm_xor_si128(v1, v2)); - } - - __forceinline friend GSVector4i operator & (const GSVector4i& v, int i) - { - return v & GSVector4i(i); - } - - __forceinline friend GSVector4i operator | (const GSVector4i& v, int i) - { - return v | GSVector4i(i); - } - - __forceinline friend GSVector4i operator ^ (const GSVector4i& v, int i) - { - return v ^ GSVector4i(i); - } - - __forceinline friend GSVector4i operator ~ (const GSVector4i& v) - { - return v ^ (v == v); - } - - __forceinline friend GSVector4i operator == (const GSVector4i& v1, const GSVector4i& v2) - { - return GSVector4i(_mm_cmpeq_epi32(v1, v2)); - } - - __forceinline friend GSVector4i operator != (const GSVector4i& v1, const GSVector4i& v2) - { - return ~(v1 == v2); - } - - __forceinline friend GSVector4i operator > (const GSVector4i& v1, const GSVector4i& v2) - { - return GSVector4i(_mm_cmpgt_epi32(v1, v2)); - } - - __forceinline friend GSVector4i operator < (const GSVector4i& v1, const GSVector4i& v2) - { - return GSVector4i(_mm_cmplt_epi32(v1, v2)); - } - - __forceinline friend GSVector4i operator >= (const GSVector4i& v1, const GSVector4i& v2) - { - return (v1 > v2) | (v1 == v2); - } - - __forceinline friend GSVector4i operator <= (const GSVector4i& v1, const GSVector4i& v2) - { - return (v1 < v2) | (v1 == v2); - } - - #define VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ - __forceinline GSVector4i xs##ys##zs##ws() const {return GSVector4i(_mm_shuffle_epi32(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector4i xs##ys##zs##ws##l() const {return GSVector4i(_mm_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector4i xs##ys##zs##ws##h() const {return GSVector4i(_mm_shufflehi_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector4i xs##ys##zs##ws##lh() const {return GSVector4i(_mm_shufflehi_epi16(_mm_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)), _MM_SHUFFLE(wn, zn, yn, xn)));} \ - - #define VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ - VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ - VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1) \ - VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2) \ - VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3) \ - - #define VECTOR4i_SHUFFLE_2(xs, xn, ys, yn) \ - VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, x, 0) \ - VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, y, 1) \ - VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, z, 2) \ - VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, w, 3) \ - - #define VECTOR4i_SHUFFLE_1(xs, xn) \ - VECTOR4i_SHUFFLE_2(xs, xn, x, 0) \ - VECTOR4i_SHUFFLE_2(xs, xn, y, 1) \ - VECTOR4i_SHUFFLE_2(xs, xn, z, 2) \ - VECTOR4i_SHUFFLE_2(xs, xn, w, 3) \ - - VECTOR4i_SHUFFLE_1(x, 0) - VECTOR4i_SHUFFLE_1(y, 1) - VECTOR4i_SHUFFLE_1(z, 2) - VECTOR4i_SHUFFLE_1(w, 3) - - __forceinline static GSVector4i zero() {return GSVector4i(_mm_setzero_si128());} - - __forceinline static GSVector4i xffffffff() {return zero() == zero();} - - __forceinline static GSVector4i x00000001() {return xffffffff().srl32(31);} - __forceinline static GSVector4i x00000003() {return xffffffff().srl32(30);} - __forceinline static GSVector4i x00000007() {return xffffffff().srl32(29);} - __forceinline static GSVector4i x0000000f() {return xffffffff().srl32(28);} - __forceinline static GSVector4i x0000001f() {return xffffffff().srl32(27);} - __forceinline static GSVector4i x0000003f() {return xffffffff().srl32(26);} - __forceinline static GSVector4i x0000007f() {return xffffffff().srl32(25);} - __forceinline static GSVector4i x000000ff() {return xffffffff().srl32(24);} - __forceinline static GSVector4i x000001ff() {return xffffffff().srl32(23);} - __forceinline static GSVector4i x000003ff() {return xffffffff().srl32(22);} - __forceinline static GSVector4i x000007ff() {return xffffffff().srl32(21);} - __forceinline static GSVector4i x00000fff() {return xffffffff().srl32(20);} - __forceinline static GSVector4i x00001fff() {return xffffffff().srl32(19);} - __forceinline static GSVector4i x00003fff() {return xffffffff().srl32(18);} - __forceinline static GSVector4i x00007fff() {return xffffffff().srl32(17);} - __forceinline static GSVector4i x0000ffff() {return xffffffff().srl32(16);} - __forceinline static GSVector4i x0001ffff() {return xffffffff().srl32(15);} - __forceinline static GSVector4i x0003ffff() {return xffffffff().srl32(14);} - __forceinline static GSVector4i x0007ffff() {return xffffffff().srl32(13);} - __forceinline static GSVector4i x000fffff() {return xffffffff().srl32(12);} - __forceinline static GSVector4i x001fffff() {return xffffffff().srl32(11);} - __forceinline static GSVector4i x003fffff() {return xffffffff().srl32(10);} - __forceinline static GSVector4i x007fffff() {return xffffffff().srl32( 9);} - __forceinline static GSVector4i x00ffffff() {return xffffffff().srl32( 8);} - __forceinline static GSVector4i x01ffffff() {return xffffffff().srl32( 7);} - __forceinline static GSVector4i x03ffffff() {return xffffffff().srl32( 6);} - __forceinline static GSVector4i x07ffffff() {return xffffffff().srl32( 5);} - __forceinline static GSVector4i x0fffffff() {return xffffffff().srl32( 4);} - __forceinline static GSVector4i x1fffffff() {return xffffffff().srl32( 3);} - __forceinline static GSVector4i x3fffffff() {return xffffffff().srl32( 2);} - __forceinline static GSVector4i x7fffffff() {return xffffffff().srl32( 1);} - - __forceinline static GSVector4i x80000000() {return xffffffff().sll32(31);} - __forceinline static GSVector4i xc0000000() {return xffffffff().sll32(30);} - __forceinline static GSVector4i xe0000000() {return xffffffff().sll32(29);} - __forceinline static GSVector4i xf0000000() {return xffffffff().sll32(28);} - __forceinline static GSVector4i xf8000000() {return xffffffff().sll32(27);} - __forceinline static GSVector4i xfc000000() {return xffffffff().sll32(26);} - __forceinline static GSVector4i xfe000000() {return xffffffff().sll32(25);} - __forceinline static GSVector4i xff000000() {return xffffffff().sll32(24);} - __forceinline static GSVector4i xff800000() {return xffffffff().sll32(23);} - __forceinline static GSVector4i xffc00000() {return xffffffff().sll32(22);} - __forceinline static GSVector4i xffe00000() {return xffffffff().sll32(21);} - __forceinline static GSVector4i xfff00000() {return xffffffff().sll32(20);} - __forceinline static GSVector4i xfff80000() {return xffffffff().sll32(19);} - __forceinline static GSVector4i xfffc0000() {return xffffffff().sll32(18);} - __forceinline static GSVector4i xfffe0000() {return xffffffff().sll32(17);} - __forceinline static GSVector4i xffff0000() {return xffffffff().sll32(16);} - __forceinline static GSVector4i xffff8000() {return xffffffff().sll32(15);} - __forceinline static GSVector4i xffffc000() {return xffffffff().sll32(14);} - __forceinline static GSVector4i xffffe000() {return xffffffff().sll32(13);} - __forceinline static GSVector4i xfffff000() {return xffffffff().sll32(12);} - __forceinline static GSVector4i xfffff800() {return xffffffff().sll32(11);} - __forceinline static GSVector4i xfffffc00() {return xffffffff().sll32(10);} - __forceinline static GSVector4i xfffffe00() {return xffffffff().sll32( 9);} - __forceinline static GSVector4i xffffff00() {return xffffffff().sll32( 8);} - __forceinline static GSVector4i xffffff80() {return xffffffff().sll32( 7);} - __forceinline static GSVector4i xffffffc0() {return xffffffff().sll32( 6);} - __forceinline static GSVector4i xffffffe0() {return xffffffff().sll32( 5);} - __forceinline static GSVector4i xfffffff0() {return xffffffff().sll32( 4);} - __forceinline static GSVector4i xfffffff8() {return xffffffff().sll32( 3);} - __forceinline static GSVector4i xfffffffc() {return xffffffff().sll32( 2);} - __forceinline static GSVector4i xfffffffe() {return xffffffff().sll32( 1);} - - __forceinline static GSVector4i x0001() {return xffffffff().srl16(15);} - __forceinline static GSVector4i x0003() {return xffffffff().srl16(14);} - __forceinline static GSVector4i x0007() {return xffffffff().srl16(13);} - __forceinline static GSVector4i x000f() {return xffffffff().srl16(12);} - __forceinline static GSVector4i x001f() {return xffffffff().srl16(11);} - __forceinline static GSVector4i x003f() {return xffffffff().srl16(10);} - __forceinline static GSVector4i x007f() {return xffffffff().srl16( 9);} - __forceinline static GSVector4i x00ff() {return xffffffff().srl16( 8);} - __forceinline static GSVector4i x01ff() {return xffffffff().srl16( 7);} - __forceinline static GSVector4i x03ff() {return xffffffff().srl16( 6);} - __forceinline static GSVector4i x07ff() {return xffffffff().srl16( 5);} - __forceinline static GSVector4i x0fff() {return xffffffff().srl16( 4);} - __forceinline static GSVector4i x1fff() {return xffffffff().srl16( 3);} - __forceinline static GSVector4i x3fff() {return xffffffff().srl16( 2);} - __forceinline static GSVector4i x7fff() {return xffffffff().srl16( 1);} - - __forceinline static GSVector4i x8000() {return xffffffff().sll16(15);} - __forceinline static GSVector4i xc000() {return xffffffff().sll16(14);} - __forceinline static GSVector4i xe000() {return xffffffff().sll16(13);} - __forceinline static GSVector4i xf000() {return xffffffff().sll16(12);} - __forceinline static GSVector4i xf800() {return xffffffff().sll16(11);} - __forceinline static GSVector4i xfc00() {return xffffffff().sll16(10);} - __forceinline static GSVector4i xfe00() {return xffffffff().sll16( 9);} - __forceinline static GSVector4i xff00() {return xffffffff().sll16( 8);} - __forceinline static GSVector4i xff80() {return xffffffff().sll16( 7);} - __forceinline static GSVector4i xffc0() {return xffffffff().sll16( 6);} - __forceinline static GSVector4i xffe0() {return xffffffff().sll16( 5);} - __forceinline static GSVector4i xfff0() {return xffffffff().sll16( 4);} - __forceinline static GSVector4i xfff8() {return xffffffff().sll16( 3);} - __forceinline static GSVector4i xfffc() {return xffffffff().sll16( 2);} - __forceinline static GSVector4i xfffe() {return xffffffff().sll16( 1);} - - __forceinline static GSVector4i xffffffff(const GSVector4i& v) {return v == v;} - - __forceinline static GSVector4i x00000001(const GSVector4i& v) {return xffffffff(v).srl32(31);} - __forceinline static GSVector4i x00000003(const GSVector4i& v) {return xffffffff(v).srl32(30);} - __forceinline static GSVector4i x00000007(const GSVector4i& v) {return xffffffff(v).srl32(29);} - __forceinline static GSVector4i x0000000f(const GSVector4i& v) {return xffffffff(v).srl32(28);} - __forceinline static GSVector4i x0000001f(const GSVector4i& v) {return xffffffff(v).srl32(27);} - __forceinline static GSVector4i x0000003f(const GSVector4i& v) {return xffffffff(v).srl32(26);} - __forceinline static GSVector4i x0000007f(const GSVector4i& v) {return xffffffff(v).srl32(25);} - __forceinline static GSVector4i x000000ff(const GSVector4i& v) {return xffffffff(v).srl32(24);} - __forceinline static GSVector4i x000001ff(const GSVector4i& v) {return xffffffff(v).srl32(23);} - __forceinline static GSVector4i x000003ff(const GSVector4i& v) {return xffffffff(v).srl32(22);} - __forceinline static GSVector4i x000007ff(const GSVector4i& v) {return xffffffff(v).srl32(21);} - __forceinline static GSVector4i x00000fff(const GSVector4i& v) {return xffffffff(v).srl32(20);} - __forceinline static GSVector4i x00001fff(const GSVector4i& v) {return xffffffff(v).srl32(19);} - __forceinline static GSVector4i x00003fff(const GSVector4i& v) {return xffffffff(v).srl32(18);} - __forceinline static GSVector4i x00007fff(const GSVector4i& v) {return xffffffff(v).srl32(17);} - __forceinline static GSVector4i x0000ffff(const GSVector4i& v) {return xffffffff(v).srl32(16);} - __forceinline static GSVector4i x0001ffff(const GSVector4i& v) {return xffffffff(v).srl32(15);} - __forceinline static GSVector4i x0003ffff(const GSVector4i& v) {return xffffffff(v).srl32(14);} - __forceinline static GSVector4i x0007ffff(const GSVector4i& v) {return xffffffff(v).srl32(13);} - __forceinline static GSVector4i x000fffff(const GSVector4i& v) {return xffffffff(v).srl32(12);} - __forceinline static GSVector4i x001fffff(const GSVector4i& v) {return xffffffff(v).srl32(11);} - __forceinline static GSVector4i x003fffff(const GSVector4i& v) {return xffffffff(v).srl32(10);} - __forceinline static GSVector4i x007fffff(const GSVector4i& v) {return xffffffff(v).srl32( 9);} - __forceinline static GSVector4i x00ffffff(const GSVector4i& v) {return xffffffff(v).srl32( 8);} - __forceinline static GSVector4i x01ffffff(const GSVector4i& v) {return xffffffff(v).srl32( 7);} - __forceinline static GSVector4i x03ffffff(const GSVector4i& v) {return xffffffff(v).srl32( 6);} - __forceinline static GSVector4i x07ffffff(const GSVector4i& v) {return xffffffff(v).srl32( 5);} - __forceinline static GSVector4i x0fffffff(const GSVector4i& v) {return xffffffff(v).srl32( 4);} - __forceinline static GSVector4i x1fffffff(const GSVector4i& v) {return xffffffff(v).srl32( 3);} - __forceinline static GSVector4i x3fffffff(const GSVector4i& v) {return xffffffff(v).srl32( 2);} - __forceinline static GSVector4i x7fffffff(const GSVector4i& v) {return xffffffff(v).srl32( 1);} - - __forceinline static GSVector4i x80000000(const GSVector4i& v) {return xffffffff(v).sll32(31);} - __forceinline static GSVector4i xc0000000(const GSVector4i& v) {return xffffffff(v).sll32(30);} - __forceinline static GSVector4i xe0000000(const GSVector4i& v) {return xffffffff(v).sll32(29);} - __forceinline static GSVector4i xf0000000(const GSVector4i& v) {return xffffffff(v).sll32(28);} - __forceinline static GSVector4i xf8000000(const GSVector4i& v) {return xffffffff(v).sll32(27);} - __forceinline static GSVector4i xfc000000(const GSVector4i& v) {return xffffffff(v).sll32(26);} - __forceinline static GSVector4i xfe000000(const GSVector4i& v) {return xffffffff(v).sll32(25);} - __forceinline static GSVector4i xff000000(const GSVector4i& v) {return xffffffff(v).sll32(24);} - __forceinline static GSVector4i xff800000(const GSVector4i& v) {return xffffffff(v).sll32(23);} - __forceinline static GSVector4i xffc00000(const GSVector4i& v) {return xffffffff(v).sll32(22);} - __forceinline static GSVector4i xffe00000(const GSVector4i& v) {return xffffffff(v).sll32(21);} - __forceinline static GSVector4i xfff00000(const GSVector4i& v) {return xffffffff(v).sll32(20);} - __forceinline static GSVector4i xfff80000(const GSVector4i& v) {return xffffffff(v).sll32(19);} - __forceinline static GSVector4i xfffc0000(const GSVector4i& v) {return xffffffff(v).sll32(18);} - __forceinline static GSVector4i xfffe0000(const GSVector4i& v) {return xffffffff(v).sll32(17);} - __forceinline static GSVector4i xffff0000(const GSVector4i& v) {return xffffffff(v).sll32(16);} - __forceinline static GSVector4i xffff8000(const GSVector4i& v) {return xffffffff(v).sll32(15);} - __forceinline static GSVector4i xffffc000(const GSVector4i& v) {return xffffffff(v).sll32(14);} - __forceinline static GSVector4i xffffe000(const GSVector4i& v) {return xffffffff(v).sll32(13);} - __forceinline static GSVector4i xfffff000(const GSVector4i& v) {return xffffffff(v).sll32(12);} - __forceinline static GSVector4i xfffff800(const GSVector4i& v) {return xffffffff(v).sll32(11);} - __forceinline static GSVector4i xfffffc00(const GSVector4i& v) {return xffffffff(v).sll32(10);} - __forceinline static GSVector4i xfffffe00(const GSVector4i& v) {return xffffffff(v).sll32( 9);} - __forceinline static GSVector4i xffffff00(const GSVector4i& v) {return xffffffff(v).sll32( 8);} - __forceinline static GSVector4i xffffff80(const GSVector4i& v) {return xffffffff(v).sll32( 7);} - __forceinline static GSVector4i xffffffc0(const GSVector4i& v) {return xffffffff(v).sll32( 6);} - __forceinline static GSVector4i xffffffe0(const GSVector4i& v) {return xffffffff(v).sll32( 5);} - __forceinline static GSVector4i xfffffff0(const GSVector4i& v) {return xffffffff(v).sll32( 4);} - __forceinline static GSVector4i xfffffff8(const GSVector4i& v) {return xffffffff(v).sll32( 3);} - __forceinline static GSVector4i xfffffffc(const GSVector4i& v) {return xffffffff(v).sll32( 2);} - __forceinline static GSVector4i xfffffffe(const GSVector4i& v) {return xffffffff(v).sll32( 1);} - - __forceinline static GSVector4i x0001(const GSVector4i& v) {return xffffffff(v).srl16(15);} - __forceinline static GSVector4i x0003(const GSVector4i& v) {return xffffffff(v).srl16(14);} - __forceinline static GSVector4i x0007(const GSVector4i& v) {return xffffffff(v).srl16(13);} - __forceinline static GSVector4i x000f(const GSVector4i& v) {return xffffffff(v).srl16(12);} - __forceinline static GSVector4i x001f(const GSVector4i& v) {return xffffffff(v).srl16(11);} - __forceinline static GSVector4i x003f(const GSVector4i& v) {return xffffffff(v).srl16(10);} - __forceinline static GSVector4i x007f(const GSVector4i& v) {return xffffffff(v).srl16( 9);} - __forceinline static GSVector4i x00ff(const GSVector4i& v) {return xffffffff(v).srl16( 8);} - __forceinline static GSVector4i x01ff(const GSVector4i& v) {return xffffffff(v).srl16( 7);} - __forceinline static GSVector4i x03ff(const GSVector4i& v) {return xffffffff(v).srl16( 6);} - __forceinline static GSVector4i x07ff(const GSVector4i& v) {return xffffffff(v).srl16( 5);} - __forceinline static GSVector4i x0fff(const GSVector4i& v) {return xffffffff(v).srl16( 4);} - __forceinline static GSVector4i x1fff(const GSVector4i& v) {return xffffffff(v).srl16( 3);} - __forceinline static GSVector4i x3fff(const GSVector4i& v) {return xffffffff(v).srl16( 2);} - __forceinline static GSVector4i x7fff(const GSVector4i& v) {return xffffffff(v).srl16( 1);} - - __forceinline static GSVector4i x8000(const GSVector4i& v) {return xffffffff(v).sll16(15);} - __forceinline static GSVector4i xc000(const GSVector4i& v) {return xffffffff(v).sll16(14);} - __forceinline static GSVector4i xe000(const GSVector4i& v) {return xffffffff(v).sll16(13);} - __forceinline static GSVector4i xf000(const GSVector4i& v) {return xffffffff(v).sll16(12);} - __forceinline static GSVector4i xf800(const GSVector4i& v) {return xffffffff(v).sll16(11);} - __forceinline static GSVector4i xfc00(const GSVector4i& v) {return xffffffff(v).sll16(10);} - __forceinline static GSVector4i xfe00(const GSVector4i& v) {return xffffffff(v).sll16( 9);} - __forceinline static GSVector4i xff00(const GSVector4i& v) {return xffffffff(v).sll16( 8);} - __forceinline static GSVector4i xff80(const GSVector4i& v) {return xffffffff(v).sll16( 7);} - __forceinline static GSVector4i xffc0(const GSVector4i& v) {return xffffffff(v).sll16( 6);} - __forceinline static GSVector4i xffe0(const GSVector4i& v) {return xffffffff(v).sll16( 5);} - __forceinline static GSVector4i xfff0(const GSVector4i& v) {return xffffffff(v).sll16( 4);} - __forceinline static GSVector4i xfff8(const GSVector4i& v) {return xffffffff(v).sll16( 3);} - __forceinline static GSVector4i xfffc(const GSVector4i& v) {return xffffffff(v).sll16( 2);} - __forceinline static GSVector4i xfffe(const GSVector4i& v) {return xffffffff(v).sll16( 1);} - - __forceinline static GSVector4i xff(int n) {return m_xff[n];} - __forceinline static GSVector4i x0f(int n) {return m_x0f[n];} -}; - -__aligned(class, 16) GSVector4 -{ -public: - union - { - struct {float x, y, z, w;}; - struct {float r, g, b, a;}; - struct {float left, top, right, bottom;}; - float v[4]; - float f32[4]; - int8 i8[16]; - int16 i16[8]; - int32 i32[4]; - int64 i64[2]; - uint8 u8[16]; - uint16 u16[8]; - uint32 u32[4]; - uint64 u64[2]; - __m128 m; - }; - - static const GSVector4 m_ps0123; - static const GSVector4 m_ps4567; - static const GSVector4 m_half; - static const GSVector4 m_one; - static const GSVector4 m_two; - static const GSVector4 m_four; - static const GSVector4 m_x4b000000; - static const GSVector4 m_x4f800000; - static const GSVector4 m_max; - static const GSVector4 m_min; - - __forceinline GSVector4() - { - } - - __forceinline GSVector4(float x, float y, float z, float w) - { - m = _mm_set_ps(w, z, y, x); - } - - __forceinline GSVector4(float x, float y) - { - m = _mm_unpacklo_ps(_mm_load_ss(&x), _mm_load_ss(&y)); - } - - __forceinline GSVector4(int x, int y, int z, int w) - { - GSVector4i v(x, y, z, w); - - m = _mm_cvtepi32_ps(v.m); - } - - __forceinline GSVector4(int x, int y) - { - m = _mm_cvtepi32_ps(_mm_unpacklo_epi32(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(y))); - } - - //Not currently used, just causes a compiler warning - /*__forceinline GSVector4(const GSVector4& v) - { - m = v.m; - }*/ - - __forceinline explicit GSVector4(const GSVector2& v) - { - m = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&v)); - } - - __forceinline explicit GSVector4(const GSVector2i& v) - { - m = _mm_cvtepi32_ps(_mm_loadl_epi64((__m128i*)&v)); - } - - __forceinline explicit GSVector4(__m128 m) - { - this->m = m; - } - - __forceinline explicit GSVector4(float f) - { - *this = f; - } - - __forceinline explicit GSVector4(int i) - { - #if _M_SSE >= 0x501 - - m = _mm_cvtepi32_ps(_mm_broadcastd_epi32(_mm_cvtsi32_si128(i))); - - #else - - GSVector4i v((int)i); - - *this = GSVector4(v); - - #endif - } - - __forceinline explicit GSVector4(uint32 u) - { - GSVector4i v((int)u); - - *this = GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31))); - } - - __forceinline explicit GSVector4(const GSVector4i& v); - - __forceinline static GSVector4 cast(const GSVector4i& v); - - #if _M_SSE >= 0x500 - - __forceinline static GSVector4 cast(const GSVector8& v); - - #endif - - #if _M_SSE >= 0x501 - - __forceinline static GSVector4 cast(const GSVector8i& v); - - #endif - - __forceinline void operator = (const GSVector4& v) - { - m = v.m; - } - - __forceinline void operator = (float f) - { - #if _M_SSE >= 0x501 - - m = _mm_broadcastss_ps(_mm_load_ss(&f)); - - #else - - m = _mm_set1_ps(f); - - #endif - } - - __forceinline void operator = (__m128 m) - { - this->m = m; - } - - __forceinline operator __m128() const - { - return m; - } - - __forceinline uint32 rgba32() const - { - return GSVector4i(*this).rgba32(); - } - - __forceinline static GSVector4 rgba32(uint32 rgba) - { - return GSVector4(GSVector4i::load((int)rgba).u8to32()); - } - - __forceinline static GSVector4 rgba32(uint32 rgba, int shift) - { - return GSVector4(GSVector4i::load((int)rgba).u8to32() << shift); - } - - __forceinline GSVector4 abs() const - { - return *this & cast(GSVector4i::x7fffffff()); - } - - __forceinline GSVector4 neg() const - { - return *this ^ cast(GSVector4i::x80000000()); - } - - __forceinline GSVector4 rcp() const - { - return GSVector4(_mm_rcp_ps(m)); - } - - __forceinline GSVector4 rcpnr() const - { - GSVector4 v = rcp(); - - return (v + v) - (v * v) * *this; - } - - template __forceinline GSVector4 round() const - { - #if _M_SSE >= 0x401 - - return GSVector4(_mm_round_ps(m, mode)); - - #else - - GSVector4 a = *this; - - GSVector4 b = (a & cast(GSVector4i::x80000000())) | m_x4b000000; - - b = a + b - b; - - if((mode & 7) == (Round_NegInf & 7)) - { - return b - ((a < b) & m_one); - } - - if((mode & 7) == (Round_PosInf & 7)) - { - return b + ((a > b) & m_one); - } - - ASSERT((mode & 7) == (Round_NearestInt & 7)); // other modes aren't implemented - - return b; - - #endif - } - - __forceinline GSVector4 floor() const - { - return round(); - } - - __forceinline GSVector4 ceil() const - { - return round(); - } - - // http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html - - #define LOG_POLY0(x, c0) GSVector4(c0) - #define LOG_POLY1(x, c0, c1) (LOG_POLY0(x, c1).madd(x, GSVector4(c0))) - #define LOG_POLY2(x, c0, c1, c2) (LOG_POLY1(x, c1, c2).madd(x, GSVector4(c0))) - #define LOG_POLY3(x, c0, c1, c2, c3) (LOG_POLY2(x, c1, c2, c3).madd(x, GSVector4(c0))) - #define LOG_POLY4(x, c0, c1, c2, c3, c4) (LOG_POLY3(x, c1, c2, c3, c4).madd(x, GSVector4(c0))) - #define LOG_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector4(c0))) - - __forceinline GSVector4 log2(int precision = 5) const - { - // NOTE: sign bit ignored, safe to pass negative numbers - - // The idea behind this algorithm is to split the float into two parts, log2(m * 2^e) => log2(m) + log2(2^e) => log2(m) + e, - // and then approximate the logarithm of the mantissa (it's 1.x when normalized, a nice short range). - - GSVector4 one = m_one; - - GSVector4i i = GSVector4i::cast(*this); - - GSVector4 e = GSVector4(((i << 1) >> 24) - GSVector4i::x0000007f()); - GSVector4 m = GSVector4::cast((i << 9) >> 9) | one; - - GSVector4 p; - - // Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ - - switch(precision) - { - case 3: - p = LOG_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); - break; - case 4: - p = LOG_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); - break; - default: - case 5: - p = LOG_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); - break; - case 6: - p = LOG_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f); - break; - } - - // This effectively increases the polynomial degree by one, but ensures that log2(1) == 0 - - p = p * (m - one); - - return p + e; - } - - __forceinline GSVector4 madd(const GSVector4& a, const GSVector4& b) const - { - #if 0//_M_SSE >= 0x501 - - return GSVector4(_mm_fmadd_ps(m, a, b)); - - #else - - return *this * a + b; - - #endif - } - - __forceinline GSVector4 msub(const GSVector4& a, const GSVector4& b) const - { - #if 0//_M_SSE >= 0x501 - - return GSVector4(_mm_fmsub_ps(m, a, b)); - - #else - - return *this * a - b; - - #endif - } - - __forceinline GSVector4 nmadd(const GSVector4& a, const GSVector4& b) const - { - #if 0//_M_SSE >= 0x501 - - return GSVector4(_mm_fnmadd_ps(m, a, b)); - - #else - - return b - *this * a; - - #endif - } - - __forceinline GSVector4 nmsub(const GSVector4& a, const GSVector4& b) const - { - #if 0//_M_SSE >= 0x501 - - return GSVector4(_mm_fnmsub_ps(m, a, b)); - - #else - - return -b - *this * a; - - #endif - } - - __forceinline GSVector4 addm(const GSVector4& a, const GSVector4& b) const - { - return a.madd(b, *this); // *this + a * b - } - - __forceinline GSVector4 subm(const GSVector4& a, const GSVector4& b) const - { - return a.nmadd(b, *this); // *this - a * b - } - - __forceinline GSVector4 hadd() const - { - #if _M_SSE >= 0x300 - - return GSVector4(_mm_hadd_ps(m, m)); - - #else - - return xzxz() + ywyw(); - - #endif - } - - __forceinline GSVector4 hadd(const GSVector4& v) const - { - #if _M_SSE >= 0x300 - - return GSVector4(_mm_hadd_ps(m, v.m)); - - #else - - return xzxz(v) + ywyw(v); - - #endif - } - - __forceinline GSVector4 hsub() const - { - #if _M_SSE >= 0x300 - - return GSVector4(_mm_hsub_ps(m, m)); - - #else - - return xzxz() - ywyw(); - - #endif - } - - __forceinline GSVector4 hsub(const GSVector4& v) const - { - #if _M_SSE >= 0x300 - - return GSVector4(_mm_hsub_ps(m, v.m)); - - #else - - return xzxz(v) - ywyw(v); - - #endif - } - - #if _M_SSE >= 0x401 - - template __forceinline GSVector4 dp(const GSVector4& v) const - { - return GSVector4(_mm_dp_ps(m, v.m, i)); - } - - #endif - - __forceinline GSVector4 sat(const GSVector4& a, const GSVector4& b) const - { - return GSVector4(_mm_min_ps(_mm_max_ps(m, a), b)); - } - - __forceinline GSVector4 sat(const GSVector4& a) const - { - return GSVector4(_mm_min_ps(_mm_max_ps(m, a.xyxy()), a.zwzw())); - } - - __forceinline GSVector4 sat(const float scale = 255) const - { - return sat(zero(), GSVector4(scale)); - } - - __forceinline GSVector4 clamp(const float scale = 255) const - { - return min(GSVector4(scale)); - } - - __forceinline GSVector4 min(const GSVector4& a) const - { - return GSVector4(_mm_min_ps(m, a)); - } - - __forceinline GSVector4 max(const GSVector4& a) const - { - return GSVector4(_mm_max_ps(m, a)); - } - - #if _M_SSE >= 0x401 - - template __forceinline GSVector4 blend32(const GSVector4& a) const - { - return GSVector4(_mm_blend_ps(m, a, mask)); - } - - #endif - - __forceinline GSVector4 blend32(const GSVector4& a, const GSVector4& mask) const - { - #if _M_SSE >= 0x401 - - return GSVector4(_mm_blendv_ps(m, a, mask)); - - #else - - return GSVector4(_mm_or_ps(_mm_andnot_ps(mask, m), _mm_and_ps(mask, a))); - - #endif - } - - __forceinline GSVector4 upl(const GSVector4& a) const - { - return GSVector4(_mm_unpacklo_ps(m, a)); - } - - __forceinline GSVector4 uph(const GSVector4& a) const - { - return GSVector4(_mm_unpackhi_ps(m, a)); - } - - __forceinline GSVector4 l2h(const GSVector4& a) const - { - return GSVector4(_mm_movelh_ps(m, a)); - } - - __forceinline GSVector4 h2l(const GSVector4& a) const - { - return GSVector4(_mm_movehl_ps(m, a)); - } - - __forceinline GSVector4 andnot(const GSVector4& v) const - { - return GSVector4(_mm_andnot_ps(v.m, m)); - } - - __forceinline int mask() const - { - return _mm_movemask_ps(m); - } - - __forceinline bool alltrue() const - { - return mask() == 0xf; - } - - __forceinline bool allfalse() const - { - #if _M_SSE >= 0x500 - - return _mm_testz_ps(m, m) != 0; - - #elif _M_SSE >= 0x401 - - __m128i a = _mm_castps_si128(m); - - return _mm_testz_si128(a, a) != 0; - - #else - - return mask() == 0; - - #endif - } - - __forceinline GSVector4 replace_nan(const GSVector4& v) const - { - return v.blend32(*this, *this == *this); - } - - template __forceinline GSVector4 insert32(const GSVector4& v) const - { - // TODO: use blendps when src == dst - - #if 0 // _M_SSE >= 0x401 - - // NOTE: it's faster with shuffles... - - return GSVector4(_mm_insert_ps(m, v.m, _MM_MK_INSERTPS_NDX(src, dst, 0))); - - #else - - switch(dst) - { - case 0: - switch(src) - { - case 0: return yyxx(v).zxzw(*this); - case 1: return yyyy(v).zxzw(*this); - case 2: return yyzz(v).zxzw(*this); - case 3: return yyww(v).zxzw(*this); - default: __assume(0); - } - break; - case 1: - switch(src) - { - case 0: return xxxx(v).xzzw(*this); - case 1: return xxyy(v).xzzw(*this); - case 2: return xxzz(v).xzzw(*this); - case 3: return xxww(v).xzzw(*this); - default: __assume(0); - } - break; - case 2: - switch(src) - { - case 0: return xyzx(wwxx(v)); - case 1: return xyzx(wwyy(v)); - case 2: return xyzx(wwzz(v)); - case 3: return xyzx(wwww(v)); - default: __assume(0); - } - break; - case 3: - switch(src) - { - case 0: return xyxz(zzxx(v)); - case 1: return xyxz(zzyy(v)); - case 2: return xyxz(zzzz(v)); - case 3: return xyxz(zzww(v)); - default: __assume(0); - } - break; - default: - __assume(0); - } - - #endif - - } - -#ifdef __linux__ -#if 0 - // Debug build error, _mm_extract_ps is actually a macro that use an anonymous union - // that contains i. I decide to rename the template on linux but it makes windows unhappy - // Hence the nice ifdef - // - // Code extract: - // union { int i; float f; } __tmp; - -GSVector.h:2977:40: error: declaration of 'int GSVector4::extract32() const::::i' - return _mm_extract_ps(m, i); -GSVector.h:2973:15: error: shadows template parm 'int i' - template __forceinline int extract32() const -#endif - - template __forceinline int extract32() const - { - #if _M_SSE >= 0x401 - - return _mm_extract_ps(m, index); - - #else - - return i32[index]; - - #endif - } -#else - template __forceinline int extract32() const - { - #if _M_SSE >= 0x401 - - return _mm_extract_ps(m, i); - - #else - - return i32[i]; - - #endif - } -#endif - - __forceinline static GSVector4 zero() - { - return GSVector4(_mm_setzero_ps()); - } - - __forceinline static GSVector4 xffffffff() - { - return zero() == zero(); - } - - __forceinline static GSVector4 ps0123() - { - return GSVector4(m_ps0123); - } - - __forceinline static GSVector4 ps4567() - { - return GSVector4(m_ps4567); - } - - __forceinline static GSVector4 loadl(const void* p) - { - return GSVector4(_mm_castpd_ps(_mm_load_sd((double*)p))); - } - - __forceinline static GSVector4 load(float f) - { - return GSVector4(_mm_load_ss(&f)); - } - - __forceinline static GSVector4 load(uint32 u) - { - GSVector4i v = GSVector4i::load((int)u); - - return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31))); - } - - template __forceinline static GSVector4 load(const void* p) - { - return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p)); - } - - __forceinline static void storent(void* p, const GSVector4& v) - { - _mm_stream_ps((float*)p, v.m); - } - - __forceinline static void storel(void* p, const GSVector4& v) - { - _mm_store_sd((double*)p, _mm_castps_pd(v.m)); - } - - template __forceinline static void store(void* p, const GSVector4& v) - { - if(aligned) _mm_store_ps((float*)p, v.m); - else _mm_storeu_ps((float*)p, v.m); - } - - __forceinline static void expand(const GSVector4i& v, GSVector4& a, GSVector4& b, GSVector4& c, GSVector4& d) - { - GSVector4i mask = GSVector4i::x000000ff(); - - a = GSVector4(v & mask); - b = GSVector4((v >> 8) & mask); - c = GSVector4((v >> 16) & mask); - d = GSVector4((v >> 24)); - } - - __forceinline static void transpose(GSVector4& a, GSVector4& b, GSVector4& c, GSVector4& d) - { - GSVector4 v0 = a.xyxy(b); - GSVector4 v1 = c.xyxy(d); - - GSVector4 e = v0.xzxz(v1); - GSVector4 f = v0.ywyw(v1); - - GSVector4 v2 = a.zwzw(b); - GSVector4 v3 = c.zwzw(d); - - GSVector4 g = v2.xzxz(v3); - GSVector4 h = v2.ywyw(v3); - - a = e; - b = f; - c = g; - d = h; -/* - GSVector4 v0 = a.xyxy(b); - GSVector4 v1 = c.xyxy(d); - GSVector4 v2 = a.zwzw(b); - GSVector4 v3 = c.zwzw(d); - - a = v0.xzxz(v1); - b = v0.ywyw(v1); - c = v2.xzxz(v3); - d = v2.ywyw(v3); -*/ -/* - GSVector4 v0 = a.upl(b); - GSVector4 v1 = a.uph(b); - GSVector4 v2 = c.upl(d); - GSVector4 v3 = c.uph(d); - - a = v0.l2h(v2); - b = v2.h2l(v0); - c = v1.l2h(v3); - d = v3.h2l(v1); -*/ } - - __forceinline GSVector4 operator - () const - { - return neg(); - } - - __forceinline void operator += (const GSVector4& v) - { - m = _mm_add_ps(m, v); - } - - __forceinline void operator -= (const GSVector4& v) - { - m = _mm_sub_ps(m, v); - } - - __forceinline void operator *= (const GSVector4& v) - { - m = _mm_mul_ps(m, v); - } - - __forceinline void operator /= (const GSVector4& v) - { - m = _mm_div_ps(m, v); - } - - __forceinline void operator += (float f) - { - *this += GSVector4(f); - } - - __forceinline void operator -= (float f) - { - *this -= GSVector4(f); - } - - __forceinline void operator *= (float f) - { - *this *= GSVector4(f); - } - - __forceinline void operator /= (float f) - { - *this /= GSVector4(f); - } - - __forceinline void operator &= (const GSVector4& v) - { - m = _mm_and_ps(m, v); - } - - __forceinline void operator |= (const GSVector4& v) - { - m = _mm_or_ps(m, v); - } - - __forceinline void operator ^= (const GSVector4& v) - { - m = _mm_xor_ps(m, v); - } - - __forceinline friend GSVector4 operator + (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_add_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator - (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_sub_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator * (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_mul_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator / (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_div_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator + (const GSVector4& v, float f) - { - return v + GSVector4(f); - } - - __forceinline friend GSVector4 operator - (const GSVector4& v, float f) - { - return v - GSVector4(f); - } - - __forceinline friend GSVector4 operator * (const GSVector4& v, float f) - { - return v * GSVector4(f); - } - - __forceinline friend GSVector4 operator / (const GSVector4& v, float f) - { - return v / GSVector4(f); - } - - __forceinline friend GSVector4 operator & (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_and_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator | (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_or_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator ^ (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_xor_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator == (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_cmpeq_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator != (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_cmpneq_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator > (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_cmpgt_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator < (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_cmplt_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator >= (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_cmpge_ps(v1, v2)); - } - - __forceinline friend GSVector4 operator <= (const GSVector4& v1, const GSVector4& v2) - { - return GSVector4(_mm_cmple_ps(v1, v2)); - } - - #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ - __forceinline GSVector4 xs##ys##zs##ws() const {return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector4 xs##ys##zs##ws(const GSVector4& v) const {return GSVector4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - - #define VECTOR4_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ - VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ - VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1) \ - VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2) \ - VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3) \ - - #define VECTOR4_SHUFFLE_2(xs, xn, ys, yn) \ - VECTOR4_SHUFFLE_3(xs, xn, ys, yn, x, 0) \ - VECTOR4_SHUFFLE_3(xs, xn, ys, yn, y, 1) \ - VECTOR4_SHUFFLE_3(xs, xn, ys, yn, z, 2) \ - VECTOR4_SHUFFLE_3(xs, xn, ys, yn, w, 3) \ - - #define VECTOR4_SHUFFLE_1(xs, xn) \ - VECTOR4_SHUFFLE_2(xs, xn, x, 0) \ - VECTOR4_SHUFFLE_2(xs, xn, y, 1) \ - VECTOR4_SHUFFLE_2(xs, xn, z, 2) \ - VECTOR4_SHUFFLE_2(xs, xn, w, 3) \ - - VECTOR4_SHUFFLE_1(x, 0) - VECTOR4_SHUFFLE_1(y, 1) - VECTOR4_SHUFFLE_1(z, 2) - VECTOR4_SHUFFLE_1(w, 3) - - #if _M_SSE >= 0x501 - - __forceinline GSVector4 broadcast32() const - { - return GSVector4(_mm_broadcastss_ps(m)); - } - - __forceinline static GSVector4 broadcast32(const GSVector4& v) - { - return GSVector4(_mm_broadcastss_ps(v.m)); - } - - __forceinline static GSVector4 broadcast32(const void* f) - { - return GSVector4(_mm_broadcastss_ps(_mm_load_ss((const float*)f))); - } - - #endif -}; - -#if _M_SSE >= 0x501 - -__aligned(class, 32) GSVector8i -{ - static const GSVector8i m_xff[33]; - static const GSVector8i m_x0f[33]; - -public: - union - { - struct {int x0, y0, z0, w0, x1, y1, z1, w1;}; - struct {int r0, g0, b0, a0, r1, g1, b1, a1;}; - int v[8]; - float f32[8]; - int8 i8[32]; - int16 i16[16]; - int32 i32[8]; - int64 i64[4]; - uint8 u8[32]; - uint16 u16[16]; - uint32 u32[8]; - uint64 u64[4]; - __m256i m; - __m128i m0, m1; - }; - - __forceinline GSVector8i() {} - - __forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true); - - __forceinline static GSVector8i cast(const GSVector8& v); - __forceinline static GSVector8i cast(const GSVector4& v); - __forceinline static GSVector8i cast(const GSVector4i& v); - - __forceinline GSVector8i(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) - { - m = _mm256_set_epi32(w1, z1, y1, x1, w0, z0, y0, x0); - } - - __forceinline GSVector8i( - short s0, short s1, short s2, short s3, short s4, short s5, short s6, short s7, - short s8, short s9, short s10, short s11, short s12, short s13, short s14, short s15) - { - m = _mm256_set_epi16(s15, s14, s13, s12, s11, s10, s9, s8, s7, s6, s5, s4, s3, s2, s1, s0); - } - - __forceinline GSVector8i( - char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, - char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15, - char b16, char b17, char b18, char b19, char b20, char b21, char b22, char b23, - char b24, char b25, char b26, char b27, char b28, char b29, char b30, char b31 - ) - { - m = _mm256_set_epi8( - b31, b30, b29, b28, b27, b26, b25, b24, b23, b22, b21, b20, b19, b18, b17, b16, - b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0); - } - - __forceinline GSVector8i(__m128i m0, __m128i m1) - { - #if 0 // _MSC_VER >= 1700 - - this->m = _mm256_permute2x128_si256(_mm256_castsi128_si256(m0), _mm256_castsi128_si256(m1), 0); - - #else - - *this = zero().insert<0>(m0).insert<1>(m1); - - #endif - } - - __forceinline GSVector8i(const GSVector8i& v) - { - m = v.m; - } - - __forceinline explicit GSVector8i(int i) - { - *this = i; - } - - __forceinline explicit GSVector8i(__m128i m) - { - *this = m; - } - - __forceinline explicit GSVector8i(__m256i m) - { - this->m = m; - } - - __forceinline void operator = (const GSVector8i& v) - { - m = v.m; - } - - __forceinline void operator = (int i) - { - m = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(i)); // m = _mm256_set1_epi32(i); - } - - __forceinline void operator = (__m128i m) - { - this->m = _mm256_inserti128_si256(_mm256_castsi128_si256(m), m, 1); - } - - __forceinline void operator = (__m256i m) - { - this->m = m; - } - - __forceinline operator __m256i() const - { - return m; - } - - // - - __forceinline GSVector8i sat_i8(const GSVector8i& a, const GSVector8i& b) const - { - return max_i8(a).min_i8(b); - } - - __forceinline GSVector8i sat_i8(const GSVector8i& a) const - { - return max_i8(a.xyxy()).min_i8(a.zwzw()); - } - - __forceinline GSVector8i sat_i16(const GSVector8i& a, const GSVector8i& b) const - { - return max_i16(a).min_i16(b); - } - - __forceinline GSVector8i sat_i16(const GSVector8i& a) const - { - return max_i16(a.xyxy()).min_i16(a.zwzw()); - } - - __forceinline GSVector8i sat_i32(const GSVector8i& a, const GSVector8i& b) const - { - return max_i32(a).min_i32(b); - } - - __forceinline GSVector8i sat_i32(const GSVector8i& a) const - { - return max_i32(a.xyxy()).min_i32(a.zwzw()); - } - - __forceinline GSVector8i sat_u8(const GSVector8i& a, const GSVector8i& b) const - { - return max_u8(a).min_u8(b); - } - - __forceinline GSVector8i sat_u8(const GSVector8i& a) const - { - return max_u8(a.xyxy()).min_u8(a.zwzw()); - } - - __forceinline GSVector8i sat_u16(const GSVector8i& a, const GSVector8i& b) const - { - return max_u16(a).min_u16(b); - } - - __forceinline GSVector8i sat_u16(const GSVector8i& a) const - { - return max_u16(a.xyxy()).min_u16(a.zwzw()); - } - - __forceinline GSVector8i sat_u32(const GSVector8i& a, const GSVector8i& b) const - { - return max_u32(a).min_u32(b); - } - - __forceinline GSVector8i sat_u32(const GSVector8i& a) const - { - return max_u32(a.xyxy()).min_u32(a.zwzw()); - } - - __forceinline GSVector8i min_i8(const GSVector8i& a) const - { - return GSVector8i(_mm256_min_epi8(m, a)); - } - - __forceinline GSVector8i max_i8(const GSVector8i& a) const - { - return GSVector8i(_mm256_max_epi8(m, a)); - } - - __forceinline GSVector8i min_i16(const GSVector8i& a) const - { - return GSVector8i(_mm256_min_epi16(m, a)); - } - - __forceinline GSVector8i max_i16(const GSVector8i& a) const - { - return GSVector8i(_mm256_max_epi16(m, a)); - } - - __forceinline GSVector8i min_i32(const GSVector8i& a) const - { - return GSVector8i(_mm256_min_epi32(m, a)); - } - - __forceinline GSVector8i max_i32(const GSVector8i& a) const - { - return GSVector8i(_mm256_max_epi32(m, a)); - } - - __forceinline GSVector8i min_u8(const GSVector8i& a) const - { - return GSVector8i(_mm256_min_epu8(m, a)); - } - - __forceinline GSVector8i max_u8(const GSVector8i& a) const - { - return GSVector8i(_mm256_max_epu8(m, a)); - } - - __forceinline GSVector8i min_u16(const GSVector8i& a) const - { - return GSVector8i(_mm256_min_epu16(m, a)); - } - - __forceinline GSVector8i max_u16(const GSVector8i& a) const - { - return GSVector8i(_mm256_max_epu16(m, a)); - } - - __forceinline GSVector8i min_u32(const GSVector8i& a) const - { - return GSVector8i(_mm256_min_epu32(m, a)); - } - - __forceinline GSVector8i max_u32(const GSVector8i& a) const - { - return GSVector8i(_mm256_max_epu32(m, a)); - } - - __forceinline GSVector8i clamp8() const - { - return pu16().upl8(); - } - - __forceinline GSVector8i blend8(const GSVector8i& a, const GSVector8i& mask) const - { - return GSVector8i(_mm256_blendv_epi8(m, a, mask)); - } - - template __forceinline GSVector8i blend16(const GSVector8i& a) const - { - return GSVector8i(_mm256_blend_epi16(m, a, mask)); - } - - __forceinline GSVector8i blend(const GSVector8i& a, const GSVector8i& mask) const - { - return GSVector8i(_mm256_or_si256(_mm256_andnot_si256(mask, m), _mm256_and_si256(mask, a))); - } - - __forceinline GSVector8i mix16(const GSVector8i& a) const - { - return blend16<0xaa>(a); - } - - __forceinline GSVector8i shuffle8(const GSVector8i& mask) const - { - return GSVector8i(_mm256_shuffle_epi8(m, mask)); - } - - __forceinline GSVector8i ps16(const GSVector8i& a) const - { - return GSVector8i(_mm256_packs_epi16(m, a)); - } - - __forceinline GSVector8i ps16() const - { - return GSVector8i(_mm256_packs_epi16(m, m)); - } - - __forceinline GSVector8i pu16(const GSVector8i& a) const - { - return GSVector8i(_mm256_packus_epi16(m, a)); - } - - __forceinline GSVector8i pu16() const - { - return GSVector8i(_mm256_packus_epi16(m, m)); - } - - __forceinline GSVector8i ps32(const GSVector8i& a) const - { - return GSVector8i(_mm256_packs_epi32(m, a)); - } - - __forceinline GSVector8i ps32() const - { - return GSVector8i(_mm256_packs_epi32(m, m)); - } - - __forceinline GSVector8i pu32(const GSVector8i& a) const - { - return GSVector8i(_mm256_packus_epi32(m, a)); - } - - __forceinline GSVector8i pu32() const - { - return GSVector8i(_mm256_packus_epi32(m, m)); - } - - __forceinline GSVector8i upl8(const GSVector8i& a) const - { - return GSVector8i(_mm256_unpacklo_epi8(m, a)); - } - - __forceinline GSVector8i uph8(const GSVector8i& a) const - { - return GSVector8i(_mm256_unpackhi_epi8(m, a)); - } - - __forceinline GSVector8i upl16(const GSVector8i& a) const - { - return GSVector8i(_mm256_unpacklo_epi16(m, a)); - } - - __forceinline GSVector8i uph16(const GSVector8i& a) const - { - return GSVector8i(_mm256_unpackhi_epi16(m, a)); - } - - __forceinline GSVector8i upl32(const GSVector8i& a) const - { - return GSVector8i(_mm256_unpacklo_epi32(m, a)); - } - - __forceinline GSVector8i uph32(const GSVector8i& a) const - { - return GSVector8i(_mm256_unpackhi_epi32(m, a)); - } - - __forceinline GSVector8i upl64(const GSVector8i& a) const - { - return GSVector8i(_mm256_unpacklo_epi64(m, a)); - } - - __forceinline GSVector8i uph64(const GSVector8i& a) const - { - return GSVector8i(_mm256_unpackhi_epi64(m, a)); - } - - __forceinline GSVector8i upl8() const - { - return GSVector8i(_mm256_unpacklo_epi8(m, _mm256_setzero_si256())); - } - - __forceinline GSVector8i uph8() const - { - return GSVector8i(_mm256_unpackhi_epi8(m, _mm256_setzero_si256())); - } - - __forceinline GSVector8i upl16() const - { - return GSVector8i(_mm256_unpacklo_epi16(m, _mm256_setzero_si256())); - } - - __forceinline GSVector8i uph16() const - { - return GSVector8i(_mm256_unpackhi_epi16(m, _mm256_setzero_si256())); - } - - __forceinline GSVector8i upl32() const - { - return GSVector8i(_mm256_unpacklo_epi32(m, _mm256_setzero_si256())); - } - - __forceinline GSVector8i uph32() const - { - return GSVector8i(_mm256_unpackhi_epi32(m, _mm256_setzero_si256())); - } - - __forceinline GSVector8i upl64() const - { - return GSVector8i(_mm256_unpacklo_epi64(m, _mm256_setzero_si256())); - } - - __forceinline GSVector8i uph64() const - { - return GSVector8i(_mm256_unpackhi_epi64(m, _mm256_setzero_si256())); - } - - // cross lane! from 128-bit to full 256-bit range - - __forceinline GSVector8i i8to16c() const - { - return GSVector8i(_mm256_cvtepi8_epi16(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i u8to16c() const - { - return GSVector8i(_mm256_cvtepu8_epi16(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i i8to32c() const - { - return GSVector8i(_mm256_cvtepi8_epi32(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i u8to32c() const - { - return GSVector8i(_mm256_cvtepu8_epi32(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i i8to64c() const - { - return GSVector8i(_mm256_cvtepi8_epi64(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i u8to64c() const - { - return GSVector8i(_mm256_cvtepu16_epi64(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i i16to32c() const - { - return GSVector8i(_mm256_cvtepi16_epi32(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i u16to32c() const - { - return GSVector8i(_mm256_cvtepu16_epi32(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i i16to64c() const - { - return GSVector8i(_mm256_cvtepi16_epi64(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i u16to64c() const - { - return GSVector8i(_mm256_cvtepu16_epi64(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i i32to64c() const - { - return GSVector8i(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i u32to64c() const - { - return GSVector8i(_mm256_cvtepu32_epi64(_mm256_castsi256_si128(m))); - } - - // - - static __forceinline GSVector8i i8to16c(const void* p) - { - return GSVector8i(_mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)p))); - } - - static __forceinline GSVector8i u8to16c(const void* p) - { - return GSVector8i(_mm256_cvtepu8_epi16(_mm_load_si128((__m128i*)p))); - } - - static __forceinline GSVector8i i8to32c(const void* p) - { - return GSVector8i(_mm256_cvtepi8_epi32(_mm_loadl_epi64((__m128i*)p))); - } - - static __forceinline GSVector8i u8to32c(const void* p) - { - return GSVector8i(_mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)p))); - } - - static __forceinline GSVector8i i8to64c(int i) - { - return GSVector8i(_mm256_cvtepi8_epi64(_mm_cvtsi32_si128(i))); - } - - static __forceinline GSVector8i u8to64c(int i) - { - return GSVector8i(_mm256_cvtepu8_epi64(_mm_cvtsi32_si128(i))); - } - - static __forceinline GSVector8i i16to32c(const void* p) - { - return GSVector8i(_mm256_cvtepi16_epi32(_mm_load_si128((__m128i*)p))); - } - - static __forceinline GSVector8i u16to32c(const void* p) - { - return GSVector8i(_mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)p))); - } - - static __forceinline GSVector8i i16to64c(const void* p) - { - return GSVector8i(_mm256_cvtepi16_epi64(_mm_loadl_epi64((__m128i*)p))); - } - - static __forceinline GSVector8i u16to64c(const void* p) - { - return GSVector8i(_mm256_cvtepu16_epi64(_mm_loadl_epi64((__m128i*)p))); - } - - static __forceinline GSVector8i i32to64c(const void* p) - { - return GSVector8i(_mm256_cvtepi32_epi64(_mm_load_si128((__m128i*)p))); - } - - static __forceinline GSVector8i u32to64c(const void* p) - { - return GSVector8i(_mm256_cvtepu32_epi64(_mm_load_si128((__m128i*)p))); - } - - // - - template __forceinline GSVector8i srl() const - { - return GSVector8i(_mm256_srli_si256(m, i)); - } - - template __forceinline GSVector8i srl(const GSVector8i& v) - { - return GSVector8i(_mm256_alignr_epi8(v.m, m, i)); - } - - template __forceinline GSVector8i sll() const - { - return GSVector8i(_mm256_slli_si256(m, i)); - //return GSVector8i(_mm256_slli_si128(m, i)); - } - - __forceinline GSVector8i sra16(int i) const - { - return GSVector8i(_mm256_srai_epi16(m, i)); - } - - __forceinline GSVector8i sra16(__m128i i) const - { - return GSVector8i(_mm256_sra_epi16(m, i)); - } - - __forceinline GSVector8i sra16(__m256i i) const - { - return GSVector8i(_mm256_sra_epi16(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i sra32(int i) const - { - return GSVector8i(_mm256_srai_epi32(m, i)); - } - - __forceinline GSVector8i sra32(__m128i i) const - { - return GSVector8i(_mm256_sra_epi32(m, i)); - } - - __forceinline GSVector8i sra32(__m256i i) const - { - return GSVector8i(_mm256_sra_epi32(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i srav32(__m256i i) const - { - return GSVector8i(_mm256_srav_epi32(m, i)); - } - - __forceinline GSVector8i sll16(int i) const - { - return GSVector8i(_mm256_slli_epi16(m, i)); - } - - __forceinline GSVector8i sll16(__m128i i) const - { - return GSVector8i(_mm256_sll_epi16(m, i)); - } - - __forceinline GSVector8i sll16(__m256i i) const - { - return GSVector8i(_mm256_sll_epi16(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i sll32(int i) const - { - return GSVector8i(_mm256_slli_epi32(m, i)); - } - - __forceinline GSVector8i sll32(__m128i i) const - { - return GSVector8i(_mm256_sll_epi32(m, i)); - } - - __forceinline GSVector8i sll32(__m256i i) const - { - return GSVector8i(_mm256_sll_epi32(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i sllv32(__m256i i) const - { - return GSVector8i(_mm256_sllv_epi32(m, i)); - } - - __forceinline GSVector8i sll64(int i) const - { - return GSVector8i(_mm256_slli_epi64(m, i)); - } - - __forceinline GSVector8i sll64(__m128i i) const - { - return GSVector8i(_mm256_sll_epi64(m, i)); - } - - __forceinline GSVector8i sll64(__m256i i) const - { - return GSVector8i(_mm256_sll_epi64(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i sllv64(__m256i i) const - { - return GSVector8i(_mm256_sllv_epi64(m, i)); - } - - __forceinline GSVector8i srl16(int i) const - { - return GSVector8i(_mm256_srli_epi16(m, i)); - } - - __forceinline GSVector8i srl16(__m128i i) const - { - return GSVector8i(_mm256_srl_epi16(m, i)); - } - - __forceinline GSVector8i srl16(__m256i i) const - { - return GSVector8i(_mm256_srl_epi16(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i srl32(int i) const - { - return GSVector8i(_mm256_srli_epi32(m, i)); - } - - __forceinline GSVector8i srl32(__m128i i) const - { - return GSVector8i(_mm256_srl_epi32(m, i)); - } - - __forceinline GSVector8i srl32(__m256i i) const - { - return GSVector8i(_mm256_srl_epi32(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i srlv32(__m256i i) const - { - return GSVector8i(_mm256_srlv_epi32(m, i)); - } - - __forceinline GSVector8i srl64(int i) const - { - return GSVector8i(_mm256_srli_epi64(m, i)); - } - - __forceinline GSVector8i srl64(__m128i i) const - { - return GSVector8i(_mm256_srl_epi64(m, i)); - } - - __forceinline GSVector8i srl64(__m256i i) const - { - return GSVector8i(_mm256_srl_epi64(m, _mm256_castsi256_si128(i))); - } - - __forceinline GSVector8i srlv64(__m256i i) const - { - return GSVector8i(_mm256_srlv_epi64(m, i)); - } - - __forceinline GSVector8i add8(const GSVector8i& v) const - { - return GSVector8i(_mm256_add_epi8(m, v.m)); - } - - __forceinline GSVector8i add16(const GSVector8i& v) const - { - return GSVector8i(_mm256_add_epi16(m, v.m)); - } - - __forceinline GSVector8i add32(const GSVector8i& v) const - { - return GSVector8i(_mm256_add_epi32(m, v.m)); - } - - __forceinline GSVector8i adds8(const GSVector8i& v) const - { - return GSVector8i(_mm256_adds_epi8(m, v.m)); - } - - __forceinline GSVector8i adds16(const GSVector8i& v) const - { - return GSVector8i(_mm256_adds_epi16(m, v.m)); - } - - __forceinline GSVector8i addus8(const GSVector8i& v) const - { - return GSVector8i(_mm256_adds_epu8(m, v.m)); - } - - __forceinline GSVector8i addus16(const GSVector8i& v) const - { - return GSVector8i(_mm256_adds_epu16(m, v.m)); - } - - __forceinline GSVector8i sub8(const GSVector8i& v) const - { - return GSVector8i(_mm256_sub_epi8(m, v.m)); - } - - __forceinline GSVector8i sub16(const GSVector8i& v) const - { - return GSVector8i(_mm256_sub_epi16(m, v.m)); - } - - __forceinline GSVector8i sub32(const GSVector8i& v) const - { - return GSVector8i(_mm256_sub_epi32(m, v.m)); - } - - __forceinline GSVector8i subs8(const GSVector8i& v) const - { - return GSVector8i(_mm256_subs_epi8(m, v.m)); - } - - __forceinline GSVector8i subs16(const GSVector8i& v) const - { - return GSVector8i(_mm256_subs_epi16(m, v.m)); - } - - __forceinline GSVector8i subus8(const GSVector8i& v) const - { - return GSVector8i(_mm256_subs_epu8(m, v.m)); - } - - __forceinline GSVector8i subus16(const GSVector8i& v) const - { - return GSVector8i(_mm256_subs_epu16(m, v.m)); - } - - __forceinline GSVector8i avg8(const GSVector8i& v) const - { - return GSVector8i(_mm256_avg_epu8(m, v.m)); - } - - __forceinline GSVector8i avg16(const GSVector8i& v) const - { - return GSVector8i(_mm256_avg_epu16(m, v.m)); - } - - __forceinline GSVector8i mul16hs(const GSVector8i& v) const - { - return GSVector8i(_mm256_mulhi_epi16(m, v.m)); - } - - __forceinline GSVector8i mul16hu(const GSVector8i& v) const - { - return GSVector8i(_mm256_mulhi_epu16(m, v.m)); - } - - __forceinline GSVector8i mul16l(const GSVector8i& v) const - { - return GSVector8i(_mm256_mullo_epi16(m, v.m)); - } - - __forceinline GSVector8i mul16hrs(const GSVector8i& v) const - { - return GSVector8i(_mm256_mulhrs_epi16(m, v.m)); - } - - GSVector8i madd(const GSVector8i& v) const - { - return GSVector8i(_mm256_madd_epi16(m, v.m)); - } - - template __forceinline GSVector8i lerp16(const GSVector8i& a, const GSVector8i& f) const - { - // (a - this) * f << shift + this - - return add16(a.sub16(*this).modulate16(f)); - } - - template __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c) - { - // (a - b) * c << shift - - return a.sub16(b).modulate16(c); - } - - template __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c, const GSVector8i& d) - { - // (a - b) * c << shift + d - - return d.add16(a.sub16(b).modulate16(c)); - } - - __forceinline GSVector8i lerp16_4(const GSVector8i& a, const GSVector8i& f) const - { - // (a - this) * f >> 4 + this (a, this: 8-bit, f: 4-bit) - - return add16(a.sub16(*this).mul16l(f).sra16(4)); - } - - template __forceinline GSVector8i modulate16(const GSVector8i& f) const - { - // a * f << shift - - if(shift == 0) - { - return mul16hrs(f); - } - - return sll16(shift + 1).mul16hs(f); - } - - __forceinline bool eq(const GSVector8i& v) const - { - GSVector8i t = *this ^ v; - - return _mm256_testz_si256(t, t) != 0; - } - - __forceinline GSVector8i eq8(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpeq_epi8(m, v.m)); - } - - __forceinline GSVector8i eq16(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpeq_epi16(m, v.m)); - } - - __forceinline GSVector8i eq32(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpeq_epi32(m, v.m)); - } - - __forceinline GSVector8i neq8(const GSVector8i& v) const - { - return ~eq8(v); - } - - __forceinline GSVector8i neq16(const GSVector8i& v) const - { - return ~eq16(v); - } - - __forceinline GSVector8i neq32(const GSVector8i& v) const - { - return ~eq32(v); - } - - __forceinline GSVector8i gt8(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpgt_epi8(m, v.m)); - } - - __forceinline GSVector8i gt16(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpgt_epi16(m, v.m)); - } - - __forceinline GSVector8i gt32(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpgt_epi32(m, v.m)); - } - - __forceinline GSVector8i lt8(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpgt_epi8(v.m, m)); - } - - __forceinline GSVector8i lt16(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpgt_epi16(v.m, m)); - } - - __forceinline GSVector8i lt32(const GSVector8i& v) const - { - return GSVector8i(_mm256_cmpgt_epi32(v.m, m)); - } - - __forceinline GSVector8i andnot(const GSVector8i& v) const - { - return GSVector8i(_mm256_andnot_si256(v.m, m)); - } - - __forceinline int mask() const - { - return _mm256_movemask_epi8(m); - } - - __forceinline bool alltrue() const - { - return mask() == (int)0xffffffff; - } - - __forceinline bool allfalse() const - { - return _mm256_testz_si256(m, m) != 0; - } - - // TODO: extract/insert - - template __forceinline int extract8() const - { - ASSERT(i < 32); - - GSVector4i v = extract(); - - return v.extract8(); - } - - template __forceinline int extract16() const - { - ASSERT(i < 16); - - GSVector4i v = extract(); - - return v.extract16(); - } - - template __forceinline int extract32() const - { - ASSERT(i < 8); - - GSVector4i v = extract(); - - if((i & 3) == 0) return GSVector4i::store(v); - - return v.extract32(); - } - - template __forceinline GSVector4i extract() const - { - ASSERT(i < 2); - - if(i == 0) return GSVector4i(_mm256_castsi256_si128(m)); - - return GSVector4i(_mm256_extracti128_si256(m, i)); - } - - template __forceinline GSVector8i insert(__m128i m) const - { - ASSERT(i < 2); - - return GSVector8i(_mm256_inserti128_si256(this->m, m, i)); - } - - // TODO: gather - - template __forceinline GSVector8i gather32_32(const T* ptr) const - { - GSVector4i v0; - GSVector4i v1; - - GSVector4i a0 = extract<0>(); - GSVector4i a1 = extract<1>(); - - v0 = GSVector4i::load((int)ptr[a0.extract32<0>()]); - v0 = v0.insert32<1>((int)ptr[a0.extract32<1>()]); - v0 = v0.insert32<2>((int)ptr[a0.extract32<2>()]); - v0 = v0.insert32<3>((int)ptr[a0.extract32<3>()]); - - v1 = GSVector4i::load((int)ptr[a1.extract32<0>()]); - v1 = v1.insert32<1>((int)ptr[a1.extract32<1>()]); - v1 = v1.insert32<2>((int)ptr[a1.extract32<2>()]); - v1 = v1.insert32<3>((int)ptr[a1.extract32<3>()]); - - return cast(v0).insert<1>(v1); - } - - __forceinline GSVector8i gather32_32(const uint8* ptr) const - { - return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff(); - } - - __forceinline GSVector8i gather32_32(const uint16* ptr) const - { - return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff(); - } - - __forceinline GSVector8i gather32_32(const uint32* ptr) const - { - return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4)); - } - - template __forceinline GSVector8i gather32_32(const T1* ptr1, const T2* ptr2) const - { - GSVector4i v0; - GSVector4i v1; - - GSVector4i a0 = extract<0>(); - GSVector4i a1 = extract<1>(); - - v0 = GSVector4i::load((int)ptr2[ptr1[a0.extract32<0>()]]); - v0 = v0.insert32<1>((int)ptr2[ptr1[a0.extract32<1>()]]); - v0 = v0.insert32<2>((int)ptr2[ptr1[a0.extract32<2>()]]); - v0 = v0.insert32<3>((int)ptr2[ptr1[a0.extract32<3>()]]); - - v1 = GSVector4i::load((int)ptr2[ptr1[a1.extract32<0>()]]); - v1 = v1.insert32<1>((int)ptr2[ptr1[a1.extract32<1>()]]); - v1 = v1.insert32<2>((int)ptr2[ptr1[a1.extract32<2>()]]); - v1 = v1.insert32<3>((int)ptr2[ptr1[a1.extract32<3>()]]); - - return cast(v0).insert<1>(v1); - } - - __forceinline GSVector8i gather32_32(const uint8* ptr1, const uint32* ptr2) const - { - return gather32_32(ptr1).gather32_32(ptr2); - } - - __forceinline GSVector8i gather32_32(const uint32* ptr1, const uint32* ptr2) const - { - return gather32_32(ptr1).gather32_32(ptr2); - } - - template __forceinline void gather32_32(const T* RESTRICT ptr, GSVector8i* RESTRICT dst) const - { - dst[0] = gather32_32<>(ptr); - } - - // - - __forceinline static GSVector8i loadnt(const void* p) - { - return GSVector8i(_mm256_stream_load_si256((__m256i*)p)); - } - - __forceinline static GSVector8i loadl(const void* p) - { - return GSVector8i(_mm256_castsi128_si256(_mm_load_si128((__m128i*)p))); - } - - __forceinline static GSVector8i loadh(const void* p) - { - return GSVector8i(_mm256_inserti128_si256(_mm256_setzero_si256(), _mm_load_si128((__m128i*)p), 1)); - - /* TODO: this may be faster - __m256i m = _mm256_castsi128_si256(_mm_load_si128((__m128i*)p)); - return GSVector8i(_mm256_permute2x128_si256(m, m, 0x08)); - */ - } - - __forceinline static GSVector8i loadh(const void* p, const GSVector8i& v) - { - return GSVector8i(_mm256_inserti128_si256(v, _mm_load_si128((__m128i*)p), 1)); - } - - __forceinline static GSVector8i load(const void* pl, const void* ph) - { - return loadh(ph, loadl(pl)); - - /* TODO: this may be faster - __m256 m0 = _mm256_castsi128_si256(_mm_load_si128((__m128*)pl)); - __m256 m1 = _mm256_castsi128_si256(_mm_load_si128((__m128*)ph)); - return GSVector8i(_mm256_permute2x128_si256(m0, m1, 0x20)); - */ - } - - __forceinline static GSVector8i load(const void* pll, const void* plh, const void* phl, const void* phh) - { - GSVector4i l = GSVector4i::load(pll, plh); - GSVector4i h = GSVector4i::load(phl, phh); - - return cast(l).ac(cast(h)); - - // return GSVector8i(l).insert<1>(h); - } - - template __forceinline static GSVector8i load(const void* p) - { - return GSVector8i(aligned ? _mm256_load_si256((__m256i*)p) : _mm256_loadu_si256((__m256i*)p)); - } - - __forceinline static GSVector8i load(int i) - { - return cast(GSVector4i::load(i)); - } - - #ifdef _M_AMD64 - - __forceinline static GSVector8i loadq(int64 i) - { - return cast(GSVector4i::loadq(i)); - } - - #endif - - __forceinline static void storent(void* p, const GSVector8i& v) - { - _mm256_stream_si256((__m256i*)p, v.m); - } - - __forceinline static void storel(void* p, const GSVector8i& v) - { - _mm_store_si128((__m128i*)p, _mm256_extracti128_si256(v.m, 0)); - } - - __forceinline static void storeh(void* p, const GSVector8i& v) - { - _mm_store_si128((__m128i*)p, _mm256_extracti128_si256(v.m, 1)); - } - - __forceinline static void store(void* pl, void* ph, const GSVector8i& v) - { - GSVector8i::storel(pl, v); - GSVector8i::storeh(ph, v); - } - - template __forceinline static void store(void* p, const GSVector8i& v) - { - if(aligned) _mm256_store_si256((__m256i*)p, v.m); - else _mm256_storeu_si256((__m256i*)p, v.m); - } - - __forceinline static int store(const GSVector8i& v) - { - return GSVector4i::store(GSVector4i::cast(v)); - } - - #ifdef _M_AMD64 - - __forceinline static int64 storeq(const GSVector8i& v) - { - return GSVector4i::storeq(GSVector4i::cast(v)); - } - - #endif - - __forceinline static void storent(void* RESTRICT dst, const void* RESTRICT src, size_t size) - { - const GSVector8i* s = (const GSVector8i*)src; - GSVector8i* d = (GSVector8i*)dst; - - if(size == 0) return; - - size_t i = 0; - size_t j = size >> 7; - - for(; i < j; i++, s += 4, d += 4) - { - storent(&d[0], s[0]); - storent(&d[1], s[1]); - storent(&d[2], s[2]); - storent(&d[3], s[3]); - } - - size &= 127; - - if(size == 0) return; - - memcpy(d, s, size); - } - - // TODO: swizzling - - __forceinline static void sw8(GSVector8i& a, GSVector8i& b) - { - GSVector8i c = a; - GSVector8i d = b; - - a = c.upl8(d); - b = c.uph8(d); - } - - __forceinline static void sw16(GSVector8i& a, GSVector8i& b) - { - GSVector8i c = a; - GSVector8i d = b; - - a = c.upl16(d); - b = c.uph16(d); - } - - __forceinline static void sw32(GSVector8i& a, GSVector8i& b) - { - GSVector8i c = a; - GSVector8i d = b; - - a = c.upl32(d); - b = c.uph32(d); - } - - __forceinline static void sw64(GSVector8i& a, GSVector8i& b) - { - GSVector8i c = a; - GSVector8i d = b; - - a = c.upl64(d); - b = c.uph64(d); - } - - __forceinline static void sw128(GSVector8i& a, GSVector8i& b) - { - GSVector8i c = a; - GSVector8i d = b; - - a = c.ac(d); - b = c.bd(d); - } - - __forceinline static void sw4(GSVector8i& a, GSVector8i& b, GSVector8i& c, GSVector8i& d) - { - const __m256i epi32_0f0f0f0f = _mm256_set1_epi32(0x0f0f0f0f); - - GSVector8i mask(epi32_0f0f0f0f); - - GSVector8i e = (b << 4).blend(a, mask); - GSVector8i f = b.blend(a >> 4, mask); - GSVector8i g = (d << 4).blend(c, mask); - GSVector8i h = d.blend(c >> 4, mask); - - a = e.upl8(f); - c = e.uph8(f); - b = g.upl8(h); - d = g.uph8(h); - } - - __forceinline static void sw8(GSVector8i& a, GSVector8i& b, GSVector8i& c, GSVector8i& d) - { - GSVector8i e = a; - GSVector8i f = c; - - a = e.upl8(b); - c = e.uph8(b); - b = f.upl8(d); - d = f.uph8(d); - } - - __forceinline static void sw16(GSVector8i& a, GSVector8i& b, GSVector8i& c, GSVector8i& d) - { - GSVector8i e = a; - GSVector8i f = c; - - a = e.upl16(b); - c = e.uph16(b); - b = f.upl16(d); - d = f.uph16(d); - } - - __forceinline static void sw32(GSVector8i& a, GSVector8i& b, GSVector8i& c, GSVector8i& d) - { - GSVector8i e = a; - GSVector8i f = c; - - a = e.upl32(b); - c = e.uph32(b); - b = f.upl32(d); - d = f.uph32(d); - } - - __forceinline static void sw64(GSVector8i& a, GSVector8i& b, GSVector8i& c, GSVector8i& d) - { - GSVector8i e = a; - GSVector8i f = c; - - a = e.upl64(b); - c = e.uph64(b); - b = f.upl64(d); - d = f.uph64(d); - } - - __forceinline static void sw128(GSVector8i& a, GSVector8i& b, GSVector8i& c, GSVector8i& d) - { - GSVector8i e = a; - GSVector8i f = c; - - a = e.ac(b); - c = e.bd(b); - b = f.ac(d); - d = f.bd(d); - } - - __forceinline void operator += (const GSVector8i& v) - { - m = _mm256_add_epi32(m, v); - } - - __forceinline void operator -= (const GSVector8i& v) - { - m = _mm256_sub_epi32(m, v); - } - - __forceinline void operator += (int i) - { - *this += GSVector8i(i); - } - - __forceinline void operator -= (int i) - { - *this -= GSVector8i(i); - } - - __forceinline void operator <<= (const int i) - { - m = _mm256_slli_epi32(m, i); - } - - __forceinline void operator >>= (const int i) - { - m = _mm256_srli_epi32(m, i); - } - - __forceinline void operator &= (const GSVector8i& v) - { - m = _mm256_and_si256(m, v); - } - - __forceinline void operator |= (const GSVector8i& v) - { - m = _mm256_or_si256(m, v); - } - - __forceinline void operator ^= (const GSVector8i& v) - { - m = _mm256_xor_si256(m, v); - } - - __forceinline friend GSVector8i operator + (const GSVector8i& v1, const GSVector8i& v2) - { - return GSVector8i(_mm256_add_epi32(v1, v2)); - } - - __forceinline friend GSVector8i operator - (const GSVector8i& v1, const GSVector8i& v2) - { - return GSVector8i(_mm256_sub_epi32(v1, v2)); - } - - __forceinline friend GSVector8i operator + (const GSVector8i& v, int i) - { - return v + GSVector8i(i); - } - - __forceinline friend GSVector8i operator - (const GSVector8i& v, int i) - { - return v - GSVector8i(i); - } - - __forceinline friend GSVector8i operator << (const GSVector8i& v, const int i) - { - return GSVector8i(_mm256_slli_epi32(v, i)); - } - - __forceinline friend GSVector8i operator >> (const GSVector8i& v, const int i) - { - return GSVector8i(_mm256_srli_epi32(v, i)); - } - - __forceinline friend GSVector8i operator & (const GSVector8i& v1, const GSVector8i& v2) - { - return GSVector8i(_mm256_and_si256(v1, v2)); - } - - __forceinline friend GSVector8i operator | (const GSVector8i& v1, const GSVector8i& v2) - { - return GSVector8i(_mm256_or_si256(v1, v2)); - } - - __forceinline friend GSVector8i operator ^ (const GSVector8i& v1, const GSVector8i& v2) - { - return GSVector8i(_mm256_xor_si256(v1, v2)); - } - - __forceinline friend GSVector8i operator & (const GSVector8i& v, int i) - { - return v & GSVector8i(i); - } - - __forceinline friend GSVector8i operator | (const GSVector8i& v, int i) - { - return v | GSVector8i(i); - } - - __forceinline friend GSVector8i operator ^ (const GSVector8i& v, int i) - { - return v ^ GSVector8i(i); - } - - __forceinline friend GSVector8i operator ~ (const GSVector8i& v) - { - return v ^ (v == v); - } - - __forceinline friend GSVector8i operator == (const GSVector8i& v1, const GSVector8i& v2) - { - return GSVector8i(_mm256_cmpeq_epi32(v1, v2)); - } - - __forceinline friend GSVector8i operator != (const GSVector8i& v1, const GSVector8i& v2) - { - return ~(v1 == v2); - } - - __forceinline friend GSVector8i operator > (const GSVector8i& v1, const GSVector8i& v2) - { - return GSVector8i(_mm256_cmpgt_epi32(v1, v2)); - } - - __forceinline friend GSVector8i operator < (const GSVector8i& v1, const GSVector8i& v2) - { - return GSVector8i(_mm256_cmpgt_epi32(v2, v1)); - } - - __forceinline friend GSVector8i operator >= (const GSVector8i& v1, const GSVector8i& v2) - { - return (v1 > v2) | (v1 == v2); - } - - __forceinline friend GSVector8i operator <= (const GSVector8i& v1, const GSVector8i& v2) - { - return (v1 < v2) | (v1 == v2); - } - - // x = v[31:0] / v[159:128] - // y = v[63:32] / v[191:160] - // z = v[95:64] / v[223:192] - // w = v[127:96] / v[255:224] - - #define VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ - __forceinline GSVector8i xs##ys##zs##ws() const {return GSVector8i(_mm256_shuffle_epi32(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector8i xs##ys##zs##ws##l() const {return GSVector8i(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector8i xs##ys##zs##ws##h() const {return GSVector8i(_mm256_shufflehi_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector8i xs##ys##zs##ws##lh() const {return GSVector8i(_mm256_shufflehi_epi16(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)), _MM_SHUFFLE(wn, zn, yn, xn)));} \ - - #define VECTOR8i_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ - VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ - VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1) \ - VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2) \ - VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3) \ - - #define VECTOR8i_SHUFFLE_2(xs, xn, ys, yn) \ - VECTOR8i_SHUFFLE_3(xs, xn, ys, yn, x, 0) \ - VECTOR8i_SHUFFLE_3(xs, xn, ys, yn, y, 1) \ - VECTOR8i_SHUFFLE_3(xs, xn, ys, yn, z, 2) \ - VECTOR8i_SHUFFLE_3(xs, xn, ys, yn, w, 3) \ - - #define VECTOR8i_SHUFFLE_1(xs, xn) \ - VECTOR8i_SHUFFLE_2(xs, xn, x, 0) \ - VECTOR8i_SHUFFLE_2(xs, xn, y, 1) \ - VECTOR8i_SHUFFLE_2(xs, xn, z, 2) \ - VECTOR8i_SHUFFLE_2(xs, xn, w, 3) \ - - VECTOR8i_SHUFFLE_1(x, 0) - VECTOR8i_SHUFFLE_1(y, 1) - VECTOR8i_SHUFFLE_1(z, 2) - VECTOR8i_SHUFFLE_1(w, 3) - - // a = v0[127:0] - // b = v0[255:128] - // c = v1[127:0] - // d = v1[255:128] - // _ = 0 - - #define VECTOR8i_PERMUTE128_2(as, an, bs, bn) \ - __forceinline GSVector8i as##bs() const {return GSVector8i(_mm256_permute2x128_si256(m, m, an | (bn << 4)));} \ - __forceinline GSVector8i as##bs(const GSVector8i& v) const {return GSVector8i(_mm256_permute2x128_si256(m, v.m, an | (bn << 4)));} \ - - #define VECTOR8i_PERMUTE128_1(as, an) \ - VECTOR8i_PERMUTE128_2(as, an, a, 0) \ - VECTOR8i_PERMUTE128_2(as, an, b, 1) \ - VECTOR8i_PERMUTE128_2(as, an, c, 2) \ - VECTOR8i_PERMUTE128_2(as, an, d, 3) \ - VECTOR8i_PERMUTE128_2(as, an, _, 8) \ - - VECTOR8i_PERMUTE128_1(a, 0) - VECTOR8i_PERMUTE128_1(b, 1) - VECTOR8i_PERMUTE128_1(c, 2) - VECTOR8i_PERMUTE128_1(d, 3) - VECTOR8i_PERMUTE128_1(_, 8) - - // a = v[63:0] - // b = v[127:64] - // c = v[191:128] - // d = v[255:192] - - #define VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, ds, dn) \ - __forceinline GSVector8i as##bs##cs##ds() const {return GSVector8i(_mm256_permute4x64_epi64(m, _MM_SHUFFLE(dn, cn, bn, an)));} \ - - #define VECTOR8i_PERMUTE64_3(as, an, bs, bn, cs, cn) \ - VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, a, 0) \ - VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, b, 1) \ - VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, c, 2) \ - VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, d, 3) \ - - #define VECTOR8i_PERMUTE64_2(as, an, bs, bn) \ - VECTOR8i_PERMUTE64_3(as, an, bs, bn, a, 0) \ - VECTOR8i_PERMUTE64_3(as, an, bs, bn, b, 1) \ - VECTOR8i_PERMUTE64_3(as, an, bs, bn, c, 2) \ - VECTOR8i_PERMUTE64_3(as, an, bs, bn, d, 3) \ - - #define VECTOR8i_PERMUTE64_1(as, an) \ - VECTOR8i_PERMUTE64_2(as, an, a, 0) \ - VECTOR8i_PERMUTE64_2(as, an, b, 1) \ - VECTOR8i_PERMUTE64_2(as, an, c, 2) \ - VECTOR8i_PERMUTE64_2(as, an, d, 3) \ - - VECTOR8i_PERMUTE64_1(a, 0) - VECTOR8i_PERMUTE64_1(b, 1) - VECTOR8i_PERMUTE64_1(c, 2) - VECTOR8i_PERMUTE64_1(d, 3) - - __forceinline GSVector8i permute32(const GSVector8i& mask) const - { - return GSVector8i(_mm256_permutevar8x32_epi32(m, mask)); - } - - __forceinline GSVector8i broadcast8() const - { - return GSVector8i(_mm256_broadcastb_epi8(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i broadcast16() const - { - return GSVector8i(_mm256_broadcastw_epi16(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i broadcast32() const - { - return GSVector8i(_mm256_broadcastd_epi32(_mm256_castsi256_si128(m))); - } - - __forceinline GSVector8i broadcast64() const - { - return GSVector8i(_mm256_broadcastq_epi64(_mm256_castsi256_si128(m))); - } - - __forceinline static GSVector8i broadcast8(const GSVector4i& v) - { - return GSVector8i(_mm256_broadcastb_epi8(v.m)); - } - - __forceinline static GSVector8i broadcast16(const GSVector4i& v) - { - return GSVector8i(_mm256_broadcastw_epi16(v.m)); - } - - __forceinline static GSVector8i broadcast32(const GSVector4i& v) - { - return GSVector8i(_mm256_broadcastd_epi32(v.m)); - } - - __forceinline static GSVector8i broadcast64(const GSVector4i& v) - { - return GSVector8i(_mm256_broadcastq_epi64(v.m)); - } - - __forceinline static GSVector8i broadcast128(const GSVector4i& v) - { - // this one only has m128 source op, it will be saved to a temp on stack if the compiler is not smart enough and use the address of v directly (<= vs2012u3rc2) - - return GSVector8i(_mm256_broadcastsi128_si256(v)); // fastest - //return GSVector8i(v); // almost as fast as broadcast - //return cast(v).insert<1>(v); // slow - //return cast(v).aa(); // slowest - } - - __forceinline static GSVector8i broadcast8(const void* p) - { - return GSVector8i(_mm256_broadcastb_epi8(_mm_cvtsi32_si128(*(const int*)p))); - } - - __forceinline static GSVector8i broadcast16(const void* p) - { - return GSVector8i(_mm256_broadcastw_epi16(_mm_cvtsi32_si128(*(const int*)p))); - } - - __forceinline static GSVector8i broadcast32(const void* p) - { - return GSVector8i(_mm256_broadcastd_epi32(_mm_cvtsi32_si128(*(const int*)p))); - } - - __forceinline static GSVector8i broadcast64(const void* p) - { - return GSVector8i(_mm256_broadcastq_epi64(_mm_loadl_epi64((const __m128i*)p))); - } - - __forceinline static GSVector8i broadcast128(const void* p) - { - return GSVector8i(_mm256_broadcastsi128_si256(*(const __m128i*)p)); - } - - __forceinline static GSVector8i zero() {return GSVector8i(_mm256_setzero_si256());} - - __forceinline static GSVector8i xffffffff() {return zero() == zero();} - - __forceinline static GSVector8i x00000001() {return xffffffff().srl32(31);} - __forceinline static GSVector8i x00000003() {return xffffffff().srl32(30);} - __forceinline static GSVector8i x00000007() {return xffffffff().srl32(29);} - __forceinline static GSVector8i x0000000f() {return xffffffff().srl32(28);} - __forceinline static GSVector8i x0000001f() {return xffffffff().srl32(27);} - __forceinline static GSVector8i x0000003f() {return xffffffff().srl32(26);} - __forceinline static GSVector8i x0000007f() {return xffffffff().srl32(25);} - __forceinline static GSVector8i x000000ff() {return xffffffff().srl32(24);} - __forceinline static GSVector8i x000001ff() {return xffffffff().srl32(23);} - __forceinline static GSVector8i x000003ff() {return xffffffff().srl32(22);} - __forceinline static GSVector8i x000007ff() {return xffffffff().srl32(21);} - __forceinline static GSVector8i x00000fff() {return xffffffff().srl32(20);} - __forceinline static GSVector8i x00001fff() {return xffffffff().srl32(19);} - __forceinline static GSVector8i x00003fff() {return xffffffff().srl32(18);} - __forceinline static GSVector8i x00007fff() {return xffffffff().srl32(17);} - __forceinline static GSVector8i x0000ffff() {return xffffffff().srl32(16);} - __forceinline static GSVector8i x0001ffff() {return xffffffff().srl32(15);} - __forceinline static GSVector8i x0003ffff() {return xffffffff().srl32(14);} - __forceinline static GSVector8i x0007ffff() {return xffffffff().srl32(13);} - __forceinline static GSVector8i x000fffff() {return xffffffff().srl32(12);} - __forceinline static GSVector8i x001fffff() {return xffffffff().srl32(11);} - __forceinline static GSVector8i x003fffff() {return xffffffff().srl32(10);} - __forceinline static GSVector8i x007fffff() {return xffffffff().srl32( 9);} - __forceinline static GSVector8i x00ffffff() {return xffffffff().srl32( 8);} - __forceinline static GSVector8i x01ffffff() {return xffffffff().srl32( 7);} - __forceinline static GSVector8i x03ffffff() {return xffffffff().srl32( 6);} - __forceinline static GSVector8i x07ffffff() {return xffffffff().srl32( 5);} - __forceinline static GSVector8i x0fffffff() {return xffffffff().srl32( 4);} - __forceinline static GSVector8i x1fffffff() {return xffffffff().srl32( 3);} - __forceinline static GSVector8i x3fffffff() {return xffffffff().srl32( 2);} - __forceinline static GSVector8i x7fffffff() {return xffffffff().srl32( 1);} - - __forceinline static GSVector8i x80000000() {return xffffffff().sll32(31);} - __forceinline static GSVector8i xc0000000() {return xffffffff().sll32(30);} - __forceinline static GSVector8i xe0000000() {return xffffffff().sll32(29);} - __forceinline static GSVector8i xf0000000() {return xffffffff().sll32(28);} - __forceinline static GSVector8i xf8000000() {return xffffffff().sll32(27);} - __forceinline static GSVector8i xfc000000() {return xffffffff().sll32(26);} - __forceinline static GSVector8i xfe000000() {return xffffffff().sll32(25);} - __forceinline static GSVector8i xff000000() {return xffffffff().sll32(24);} - __forceinline static GSVector8i xff800000() {return xffffffff().sll32(23);} - __forceinline static GSVector8i xffc00000() {return xffffffff().sll32(22);} - __forceinline static GSVector8i xffe00000() {return xffffffff().sll32(21);} - __forceinline static GSVector8i xfff00000() {return xffffffff().sll32(20);} - __forceinline static GSVector8i xfff80000() {return xffffffff().sll32(19);} - __forceinline static GSVector8i xfffc0000() {return xffffffff().sll32(18);} - __forceinline static GSVector8i xfffe0000() {return xffffffff().sll32(17);} - __forceinline static GSVector8i xffff0000() {return xffffffff().sll32(16);} - __forceinline static GSVector8i xffff8000() {return xffffffff().sll32(15);} - __forceinline static GSVector8i xffffc000() {return xffffffff().sll32(14);} - __forceinline static GSVector8i xffffe000() {return xffffffff().sll32(13);} - __forceinline static GSVector8i xfffff000() {return xffffffff().sll32(12);} - __forceinline static GSVector8i xfffff800() {return xffffffff().sll32(11);} - __forceinline static GSVector8i xfffffc00() {return xffffffff().sll32(10);} - __forceinline static GSVector8i xfffffe00() {return xffffffff().sll32( 9);} - __forceinline static GSVector8i xffffff00() {return xffffffff().sll32( 8);} - __forceinline static GSVector8i xffffff80() {return xffffffff().sll32( 7);} - __forceinline static GSVector8i xffffffc0() {return xffffffff().sll32( 6);} - __forceinline static GSVector8i xffffffe0() {return xffffffff().sll32( 5);} - __forceinline static GSVector8i xfffffff0() {return xffffffff().sll32( 4);} - __forceinline static GSVector8i xfffffff8() {return xffffffff().sll32( 3);} - __forceinline static GSVector8i xfffffffc() {return xffffffff().sll32( 2);} - __forceinline static GSVector8i xfffffffe() {return xffffffff().sll32( 1);} - - __forceinline static GSVector8i x0001() {return xffffffff().srl16(15);} - __forceinline static GSVector8i x0003() {return xffffffff().srl16(14);} - __forceinline static GSVector8i x0007() {return xffffffff().srl16(13);} - __forceinline static GSVector8i x000f() {return xffffffff().srl16(12);} - __forceinline static GSVector8i x001f() {return xffffffff().srl16(11);} - __forceinline static GSVector8i x003f() {return xffffffff().srl16(10);} - __forceinline static GSVector8i x007f() {return xffffffff().srl16( 9);} - __forceinline static GSVector8i x00ff() {return xffffffff().srl16( 8);} - __forceinline static GSVector8i x01ff() {return xffffffff().srl16( 7);} - __forceinline static GSVector8i x03ff() {return xffffffff().srl16( 6);} - __forceinline static GSVector8i x07ff() {return xffffffff().srl16( 5);} - __forceinline static GSVector8i x0fff() {return xffffffff().srl16( 4);} - __forceinline static GSVector8i x1fff() {return xffffffff().srl16( 3);} - __forceinline static GSVector8i x3fff() {return xffffffff().srl16( 2);} - __forceinline static GSVector8i x7fff() {return xffffffff().srl16( 1);} - - __forceinline static GSVector8i x8000() {return xffffffff().sll16(15);} - __forceinline static GSVector8i xc000() {return xffffffff().sll16(14);} - __forceinline static GSVector8i xe000() {return xffffffff().sll16(13);} - __forceinline static GSVector8i xf000() {return xffffffff().sll16(12);} - __forceinline static GSVector8i xf800() {return xffffffff().sll16(11);} - __forceinline static GSVector8i xfc00() {return xffffffff().sll16(10);} - __forceinline static GSVector8i xfe00() {return xffffffff().sll16( 9);} - __forceinline static GSVector8i xff00() {return xffffffff().sll16( 8);} - __forceinline static GSVector8i xff80() {return xffffffff().sll16( 7);} - __forceinline static GSVector8i xffc0() {return xffffffff().sll16( 6);} - __forceinline static GSVector8i xffe0() {return xffffffff().sll16( 5);} - __forceinline static GSVector8i xfff0() {return xffffffff().sll16( 4);} - __forceinline static GSVector8i xfff8() {return xffffffff().sll16( 3);} - __forceinline static GSVector8i xfffc() {return xffffffff().sll16( 2);} - __forceinline static GSVector8i xfffe() {return xffffffff().sll16( 1);} - - __forceinline static GSVector8i xffffffff(const GSVector8i& v) {return v == v;} - - __forceinline static GSVector8i x00000001(const GSVector8i& v) {return xffffffff(v).srl32(31);} - __forceinline static GSVector8i x00000003(const GSVector8i& v) {return xffffffff(v).srl32(30);} - __forceinline static GSVector8i x00000007(const GSVector8i& v) {return xffffffff(v).srl32(29);} - __forceinline static GSVector8i x0000000f(const GSVector8i& v) {return xffffffff(v).srl32(28);} - __forceinline static GSVector8i x0000001f(const GSVector8i& v) {return xffffffff(v).srl32(27);} - __forceinline static GSVector8i x0000003f(const GSVector8i& v) {return xffffffff(v).srl32(26);} - __forceinline static GSVector8i x0000007f(const GSVector8i& v) {return xffffffff(v).srl32(25);} - __forceinline static GSVector8i x000000ff(const GSVector8i& v) {return xffffffff(v).srl32(24);} - __forceinline static GSVector8i x000001ff(const GSVector8i& v) {return xffffffff(v).srl32(23);} - __forceinline static GSVector8i x000003ff(const GSVector8i& v) {return xffffffff(v).srl32(22);} - __forceinline static GSVector8i x000007ff(const GSVector8i& v) {return xffffffff(v).srl32(21);} - __forceinline static GSVector8i x00000fff(const GSVector8i& v) {return xffffffff(v).srl32(20);} - __forceinline static GSVector8i x00001fff(const GSVector8i& v) {return xffffffff(v).srl32(19);} - __forceinline static GSVector8i x00003fff(const GSVector8i& v) {return xffffffff(v).srl32(18);} - __forceinline static GSVector8i x00007fff(const GSVector8i& v) {return xffffffff(v).srl32(17);} - __forceinline static GSVector8i x0000ffff(const GSVector8i& v) {return xffffffff(v).srl32(16);} - __forceinline static GSVector8i x0001ffff(const GSVector8i& v) {return xffffffff(v).srl32(15);} - __forceinline static GSVector8i x0003ffff(const GSVector8i& v) {return xffffffff(v).srl32(14);} - __forceinline static GSVector8i x0007ffff(const GSVector8i& v) {return xffffffff(v).srl32(13);} - __forceinline static GSVector8i x000fffff(const GSVector8i& v) {return xffffffff(v).srl32(12);} - __forceinline static GSVector8i x001fffff(const GSVector8i& v) {return xffffffff(v).srl32(11);} - __forceinline static GSVector8i x003fffff(const GSVector8i& v) {return xffffffff(v).srl32(10);} - __forceinline static GSVector8i x007fffff(const GSVector8i& v) {return xffffffff(v).srl32( 9);} - __forceinline static GSVector8i x00ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 8);} - __forceinline static GSVector8i x01ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 7);} - __forceinline static GSVector8i x03ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 6);} - __forceinline static GSVector8i x07ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 5);} - __forceinline static GSVector8i x0fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 4);} - __forceinline static GSVector8i x1fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 3);} - __forceinline static GSVector8i x3fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 2);} - __forceinline static GSVector8i x7fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 1);} - - __forceinline static GSVector8i x80000000(const GSVector8i& v) {return xffffffff(v).sll32(31);} - __forceinline static GSVector8i xc0000000(const GSVector8i& v) {return xffffffff(v).sll32(30);} - __forceinline static GSVector8i xe0000000(const GSVector8i& v) {return xffffffff(v).sll32(29);} - __forceinline static GSVector8i xf0000000(const GSVector8i& v) {return xffffffff(v).sll32(28);} - __forceinline static GSVector8i xf8000000(const GSVector8i& v) {return xffffffff(v).sll32(27);} - __forceinline static GSVector8i xfc000000(const GSVector8i& v) {return xffffffff(v).sll32(26);} - __forceinline static GSVector8i xfe000000(const GSVector8i& v) {return xffffffff(v).sll32(25);} - __forceinline static GSVector8i xff000000(const GSVector8i& v) {return xffffffff(v).sll32(24);} - __forceinline static GSVector8i xff800000(const GSVector8i& v) {return xffffffff(v).sll32(23);} - __forceinline static GSVector8i xffc00000(const GSVector8i& v) {return xffffffff(v).sll32(22);} - __forceinline static GSVector8i xffe00000(const GSVector8i& v) {return xffffffff(v).sll32(21);} - __forceinline static GSVector8i xfff00000(const GSVector8i& v) {return xffffffff(v).sll32(20);} - __forceinline static GSVector8i xfff80000(const GSVector8i& v) {return xffffffff(v).sll32(19);} - __forceinline static GSVector8i xfffc0000(const GSVector8i& v) {return xffffffff(v).sll32(18);} - __forceinline static GSVector8i xfffe0000(const GSVector8i& v) {return xffffffff(v).sll32(17);} - __forceinline static GSVector8i xffff0000(const GSVector8i& v) {return xffffffff(v).sll32(16);} - __forceinline static GSVector8i xffff8000(const GSVector8i& v) {return xffffffff(v).sll32(15);} - __forceinline static GSVector8i xffffc000(const GSVector8i& v) {return xffffffff(v).sll32(14);} - __forceinline static GSVector8i xffffe000(const GSVector8i& v) {return xffffffff(v).sll32(13);} - __forceinline static GSVector8i xfffff000(const GSVector8i& v) {return xffffffff(v).sll32(12);} - __forceinline static GSVector8i xfffff800(const GSVector8i& v) {return xffffffff(v).sll32(11);} - __forceinline static GSVector8i xfffffc00(const GSVector8i& v) {return xffffffff(v).sll32(10);} - __forceinline static GSVector8i xfffffe00(const GSVector8i& v) {return xffffffff(v).sll32( 9);} - __forceinline static GSVector8i xffffff00(const GSVector8i& v) {return xffffffff(v).sll32( 8);} - __forceinline static GSVector8i xffffff80(const GSVector8i& v) {return xffffffff(v).sll32( 7);} - __forceinline static GSVector8i xffffffc0(const GSVector8i& v) {return xffffffff(v).sll32( 6);} - __forceinline static GSVector8i xffffffe0(const GSVector8i& v) {return xffffffff(v).sll32( 5);} - __forceinline static GSVector8i xfffffff0(const GSVector8i& v) {return xffffffff(v).sll32( 4);} - __forceinline static GSVector8i xfffffff8(const GSVector8i& v) {return xffffffff(v).sll32( 3);} - __forceinline static GSVector8i xfffffffc(const GSVector8i& v) {return xffffffff(v).sll32( 2);} - __forceinline static GSVector8i xfffffffe(const GSVector8i& v) {return xffffffff(v).sll32( 1);} - - __forceinline static GSVector8i x0001(const GSVector8i& v) {return xffffffff(v).srl16(15);} - __forceinline static GSVector8i x0003(const GSVector8i& v) {return xffffffff(v).srl16(14);} - __forceinline static GSVector8i x0007(const GSVector8i& v) {return xffffffff(v).srl16(13);} - __forceinline static GSVector8i x000f(const GSVector8i& v) {return xffffffff(v).srl16(12);} - __forceinline static GSVector8i x001f(const GSVector8i& v) {return xffffffff(v).srl16(11);} - __forceinline static GSVector8i x003f(const GSVector8i& v) {return xffffffff(v).srl16(10);} - __forceinline static GSVector8i x007f(const GSVector8i& v) {return xffffffff(v).srl16( 9);} - __forceinline static GSVector8i x00ff(const GSVector8i& v) {return xffffffff(v).srl16( 8);} - __forceinline static GSVector8i x01ff(const GSVector8i& v) {return xffffffff(v).srl16( 7);} - __forceinline static GSVector8i x03ff(const GSVector8i& v) {return xffffffff(v).srl16( 6);} - __forceinline static GSVector8i x07ff(const GSVector8i& v) {return xffffffff(v).srl16( 5);} - __forceinline static GSVector8i x0fff(const GSVector8i& v) {return xffffffff(v).srl16( 4);} - __forceinline static GSVector8i x1fff(const GSVector8i& v) {return xffffffff(v).srl16( 3);} - __forceinline static GSVector8i x3fff(const GSVector8i& v) {return xffffffff(v).srl16( 2);} - __forceinline static GSVector8i x7fff(const GSVector8i& v) {return xffffffff(v).srl16( 1);} - - __forceinline static GSVector8i x8000(const GSVector8i& v) {return xffffffff(v).sll16(15);} - __forceinline static GSVector8i xc000(const GSVector8i& v) {return xffffffff(v).sll16(14);} - __forceinline static GSVector8i xe000(const GSVector8i& v) {return xffffffff(v).sll16(13);} - __forceinline static GSVector8i xf000(const GSVector8i& v) {return xffffffff(v).sll16(12);} - __forceinline static GSVector8i xf800(const GSVector8i& v) {return xffffffff(v).sll16(11);} - __forceinline static GSVector8i xfc00(const GSVector8i& v) {return xffffffff(v).sll16(10);} - __forceinline static GSVector8i xfe00(const GSVector8i& v) {return xffffffff(v).sll16( 9);} - __forceinline static GSVector8i xff00(const GSVector8i& v) {return xffffffff(v).sll16( 8);} - __forceinline static GSVector8i xff80(const GSVector8i& v) {return xffffffff(v).sll16( 7);} - __forceinline static GSVector8i xffc0(const GSVector8i& v) {return xffffffff(v).sll16( 6);} - __forceinline static GSVector8i xffe0(const GSVector8i& v) {return xffffffff(v).sll16( 5);} - __forceinline static GSVector8i xfff0(const GSVector8i& v) {return xffffffff(v).sll16( 4);} - __forceinline static GSVector8i xfff8(const GSVector8i& v) {return xffffffff(v).sll16( 3);} - __forceinline static GSVector8i xfffc(const GSVector8i& v) {return xffffffff(v).sll16( 2);} - __forceinline static GSVector8i xfffe(const GSVector8i& v) {return xffffffff(v).sll16( 1);} - - __forceinline static GSVector8i xff(int n) {return m_xff[n];} - __forceinline static GSVector8i x0f(int n) {return m_x0f[n];} -}; - -#endif - -#if _M_SSE >= 0x500 - -__aligned(class, 32) GSVector8 -{ -public: - union - { - struct {float x0, y0, z0, w0, x1, y1, z1, w1;}; - struct {float r0, g0, b0, a0, r1, g1, b1, a1;}; - float v[8]; - float f32[8]; - int8 i8[32]; - int16 i16[16]; - int32 i32[8]; - int64 i64[4]; - uint8 u8[32]; - uint16 u16[16]; - uint32 u32[8]; - uint64 u64[4]; - __m256 m; - __m128 m0, m1; - }; - - static const GSVector8 m_half; - static const GSVector8 m_one; - static const GSVector8 m_x7fffffff; - static const GSVector8 m_x80000000; - static const GSVector8 m_x4b000000; - static const GSVector8 m_x4f800000; - static const GSVector8 m_max; - static const GSVector8 m_min; - - __forceinline GSVector8() - { - } - - __forceinline GSVector8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1) - { - m = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0); - } - - __forceinline GSVector8(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) - { - m = _mm256_cvtepi32_ps(_mm256_set_epi32(w1, z1, y1, x1, w0, z0, y0, x0)); - } - - __forceinline GSVector8(__m128 m0, __m128 m1) - { - #if 0 // _MSC_VER >= 1700 - - this->m = _mm256_permute2f128_ps(_mm256_castps128_ps256(m0), _mm256_castps128_ps256(m1), 0x20); - - #else - - this->m = zero().insert<0>(m0).insert<1>(m1); - - #endif - } - - __forceinline GSVector8(const GSVector8& v) - { - m = v.m; - } - - __forceinline explicit GSVector8(float f) - { - *this = f; - } - - __forceinline explicit GSVector8(int i) - { - #if _M_SSE >= 0x501 - - m = _mm256_cvtepi32_ps(_mm256_broadcastd_epi32(_mm_cvtsi32_si128(i))); - - #else - - GSVector4i v((int)i); - - *this = GSVector4(v); - - #endif - } - - __forceinline explicit GSVector8(__m128 m) - { - *this = m; - } - - __forceinline explicit GSVector8(__m256 m) - { - this->m = m; - } - - #if _M_SSE >= 0x501 - - __forceinline explicit GSVector8(const GSVector8i& v); - - __forceinline static GSVector8 cast(const GSVector8i& v); - - #endif - - __forceinline static GSVector8 cast(const GSVector4& v); - __forceinline static GSVector8 cast(const GSVector4i& v); - - __forceinline void operator = (const GSVector8& v) - { - m = v.m; - } - - __forceinline void operator = (float f) - { - #if _M_SSE >= 0x501 - - m = _mm256_broadcastss_ps(_mm_load_ss(&f)); - - #else - - m = _mm256_set1_ps(f); - - #endif - } - - __forceinline void operator = (__m128 m) - { - this->m = _mm256_insertf128_ps(_mm256_castps128_ps256(m), m, 1); - } - - __forceinline void operator = (__m256 m) - { - this->m = m; - } - - __forceinline operator __m256() const - { - return m; - } - - __forceinline GSVector8 abs() const - { - #if _M_SSE >= 0x501 - - return *this & cast(GSVector8i::x7fffffff()); - - #else - - return *this & m_x7fffffff; - - #endif - } - - __forceinline GSVector8 neg() const - { - #if _M_SSE >= 0x501 - - return *this ^ cast(GSVector8i::x80000000()); - - #else - - return *this ^ m_x80000000; - - #endif - } - - __forceinline GSVector8 rcp() const - { - return GSVector8(_mm256_rcp_ps(m)); - } - - __forceinline GSVector8 rcpnr() const - { - GSVector8 v = rcp(); - - return (v + v) - (v * v) * *this; - } - - template __forceinline GSVector8 round() const - { - return GSVector8(_mm256_round_ps(m, mode)); - } - - __forceinline GSVector8 floor() const - { - return round(); - } - - __forceinline GSVector8 ceil() const - { - return round(); - } - - #if _M_SSE >= 0x501 - - #define LOG8_POLY0(x, c0) GSVector8(c0) - #define LOG8_POLY1(x, c0, c1) (LOG8_POLY0(x, c1).madd(x, GSVector8(c0))) - #define LOG8_POLY2(x, c0, c1, c2) (LOG8_POLY1(x, c1, c2).madd(x, GSVector8(c0))) - #define LOG8_POLY3(x, c0, c1, c2, c3) (LOG8_POLY2(x, c1, c2, c3).madd(x, GSVector8(c0))) - #define LOG8_POLY4(x, c0, c1, c2, c3, c4) (LOG8_POLY3(x, c1, c2, c3, c4).madd(x, GSVector8(c0))) - #define LOG8_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG8_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector8(c0))) - - __forceinline GSVector8 log2(int precision = 5) const - { - // NOTE: see GSVector4::log2 - - GSVector8 one = m_one; - - GSVector8i i = GSVector8i::cast(*this); - - GSVector8 e = GSVector8(((i << 1) >> 24) - GSVector8i::x0000007f()); - GSVector8 m = GSVector8::cast((i << 9) >> 9) | one; - - GSVector8 p; - - switch(precision) - { - case 3: - p = LOG8_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); - break; - case 4: - p = LOG8_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); - break; - default: - case 5: - p = LOG8_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); - break; - case 6: - p = LOG8_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f); - break; - } - - // This effectively increases the polynomial degree by one, but ensures that log2(1) == 0 - - p = p * (m - one); - - return p + e; - } - - #endif - - __forceinline GSVector8 madd(const GSVector8& a, const GSVector8& b) const - { - #if 0//_M_SSE >= 0x501 - - return GSVector8(_mm256_fmadd_ps(m, a, b)); - - #else - - return *this * a + b; - - #endif - } - - __forceinline GSVector8 msub(const GSVector8& a, const GSVector8& b) const - { - #if 0//_M_SSE >= 0x501 - - return GSVector8(_mm256_fmsub_ps(m, a, b)); - - #else - - return *this * a - b; - - #endif - } - - __forceinline GSVector8 nmadd(const GSVector8& a, const GSVector8& b) const - { - #if 0//_M_SSE >= 0x501 - - return GSVector8(_mm256_fnmadd_ps(m, a, b)); - - #else - - return b - *this * a; - - #endif - } - - __forceinline GSVector8 nmsub(const GSVector8& a, const GSVector8& b) const - { - #if 0//_M_SSE >= 0x501 - - return GSVector8(_mm256_fnmsub_ps(m, a, b)); - - #else - - return -b - *this * a; - - #endif - } - - __forceinline GSVector8 addm(const GSVector8& a, const GSVector8& b) const - { - return a.madd(b, *this); // *this + a * b - } - - __forceinline GSVector8 subm(const GSVector8& a, const GSVector8& b) const - { - return a.nmadd(b, *this); // *this - a * b - } - - __forceinline GSVector8 hadd() const - { - return GSVector8(_mm256_hadd_ps(m, m)); - } - - __forceinline GSVector8 hadd(const GSVector8& v) const - { - return GSVector8(_mm256_hadd_ps(m, v.m)); - } - - __forceinline GSVector8 hsub() const - { - return GSVector8(_mm256_hsub_ps(m, m)); - } - - __forceinline GSVector8 hsub(const GSVector8& v) const - { - return GSVector8(_mm256_hsub_ps(m, v.m)); - } - - template __forceinline GSVector8 dp(const GSVector8& v) const - { - return GSVector8(_mm256_dp_ps(m, v.m, i)); - } - - __forceinline GSVector8 sat(const GSVector8& a, const GSVector8& b) const - { - return GSVector8(_mm256_min_ps(_mm256_max_ps(m, a), b)); - } - - __forceinline GSVector8 sat(const GSVector8& a) const - { - return GSVector8(_mm256_min_ps(_mm256_max_ps(m, a.xyxy()), a.zwzw())); - } - - __forceinline GSVector8 sat(const float scale = 255) const - { - return sat(zero(), GSVector8(scale)); - } - - __forceinline GSVector8 clamp(const float scale = 255) const - { - return min(GSVector8(scale)); - } - - __forceinline GSVector8 min(const GSVector8& a) const - { - return GSVector8(_mm256_min_ps(m, a)); - } - - __forceinline GSVector8 max(const GSVector8& a) const - { - return GSVector8(_mm256_max_ps(m, a)); - } - - template __forceinline GSVector8 blend32(const GSVector8& a) const - { - return GSVector8(_mm256_blend_ps(m, a, mask)); - } - - __forceinline GSVector8 blend32(const GSVector8& a, const GSVector8& mask) const - { - return GSVector8(_mm256_blendv_ps(m, a, mask)); - } - - __forceinline GSVector8 upl(const GSVector8& a) const - { - return GSVector8(_mm256_unpacklo_ps(m, a)); - } - - __forceinline GSVector8 uph(const GSVector8& a) const - { - return GSVector8(_mm256_unpackhi_ps(m, a)); - } - - __forceinline GSVector8 upl64(const GSVector8& a) const - { - return GSVector8(_mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(m), _mm256_castps_pd(a)))); - } - - __forceinline GSVector8 uph64(const GSVector8& a) const - { - return GSVector8(_mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(m), _mm256_castps_pd(a)))); - } - - __forceinline GSVector8 l2h() const - { - return xyxy(); - } - - __forceinline GSVector8 h2l() const - { - return zwzw(); - } - - __forceinline GSVector8 andnot(const GSVector8& v) const - { - return GSVector8(_mm256_andnot_ps(v.m, m)); - } - - __forceinline int mask() const - { - return _mm256_movemask_ps(m); - } - - __forceinline bool alltrue() const - { - return mask() == 0xff; - } - - __forceinline bool allfalse() const - { - return _mm256_testz_ps(m, m) != 0; - } - - __forceinline GSVector8 replace_nan(const GSVector8& v) const - { - return v.blend32(*this, *this == *this); - } - - template __forceinline GSVector8 insert32(const GSVector8& v) const - { - // TODO: use blendps when src == dst - - ASSERT(src < 4 && dst < 4); // not cross lane like extract32() - - switch(dst) - { - case 0: - switch(src) - { - case 0: return yyxx(v).zxzw(*this); - case 1: return yyyy(v).zxzw(*this); - case 2: return yyzz(v).zxzw(*this); - case 3: return yyww(v).zxzw(*this); - default: __assume(0); - } - break; - case 1: - switch(src) - { - case 0: return xxxx(v).xzzw(*this); - case 1: return xxyy(v).xzzw(*this); - case 2: return xxzz(v).xzzw(*this); - case 3: return xxww(v).xzzw(*this); - default: __assume(0); - } - break; - case 2: - switch(src) - { - case 0: return xyzx(wwxx(v)); - case 1: return xyzx(wwyy(v)); - case 2: return xyzx(wwzz(v)); - case 3: return xyzx(wwww(v)); - default: __assume(0); - } - break; - case 3: - switch(src) - { - case 0: return xyxz(zzxx(v)); - case 1: return xyxz(zzyy(v)); - case 2: return xyxz(zzzz(v)); - case 3: return xyxz(zzww(v)); - default: __assume(0); - } - break; - default: - __assume(0); - } - - return *this; - } - - template __forceinline int extract32() const - { - ASSERT(i < 8); - - return extract().template extract32(); - } - - template __forceinline GSVector8 insert(__m128 m) const - { - ASSERT(i < 2); - - return GSVector8(_mm256_insertf128_ps(this->m, m, i)); - } - - template __forceinline GSVector4 extract() const - { - ASSERT(i < 2); - - if(i == 0) return GSVector4(_mm256_castps256_ps128(m)); - - return GSVector4(_mm256_extractf128_ps(m, i)); - } - - __forceinline static GSVector8 zero() - { - return GSVector8(_mm256_setzero_ps()); - } - - __forceinline static GSVector8 xffffffff() - { - return zero() == zero(); - } - - // TODO - - __forceinline static GSVector8 loadl(const void* p) - { - return GSVector8(_mm256_castps128_ps256(_mm_load_ps((float*)p))); - } - - __forceinline static GSVector8 loadh(const void* p) - { - return zero().insert<1>(_mm_load_ps((float*)p)); - } - - __forceinline static GSVector8 loadh(const void* p, const GSVector8& v) - { - return GSVector8(_mm256_insertf128_ps(v, _mm_load_ps((float*)p), 1)); - } - - __forceinline static GSVector8 load(const void* pl, const void* ph) - { - return loadh(ph, loadl(pl)); - } - - template __forceinline static GSVector8 load(const void* p) - { - return GSVector8(aligned ? _mm256_load_ps((const float*)p) : _mm256_loadu_ps((const float*)p)); - } - - // TODO - - __forceinline static void storel(void* p, const GSVector8& v) - { - _mm_store_ps((float*)p, _mm256_extractf128_ps(v.m, 0)); - } - - __forceinline static void storeh(void* p, const GSVector8& v) - { - _mm_store_ps((float*)p, _mm256_extractf128_ps(v.m, 1)); - } - - template __forceinline static void store(void* p, const GSVector8& v) - { - if(aligned) _mm256_store_ps((float*)p, v.m); - else _mm256_storeu_ps((float*)p, v.m); - } - - // - - __forceinline static void zeroupper() - { - _mm256_zeroupper(); - } - - __forceinline static void zeroall() - { - _mm256_zeroall(); - } - - // - - __forceinline GSVector8 operator - () const - { - return neg(); - } - - __forceinline void operator += (const GSVector8& v) - { - m = _mm256_add_ps(m, v); - } - - __forceinline void operator -= (const GSVector8& v) - { - m = _mm256_sub_ps(m, v); - } - - __forceinline void operator *= (const GSVector8& v) - { - m = _mm256_mul_ps(m, v); - } - - __forceinline void operator /= (const GSVector8& v) - { - m = _mm256_div_ps(m, v); - } - - __forceinline void operator += (float f) - { - *this += GSVector8(f); - } - - __forceinline void operator -= (float f) - { - *this -= GSVector8(f); - } - - __forceinline void operator *= (float f) - { - *this *= GSVector8(f); - } - - __forceinline void operator /= (float f) - { - *this /= GSVector8(f); - } - - __forceinline void operator &= (const GSVector8& v) - { - m = _mm256_and_ps(m, v); - } - - __forceinline void operator |= (const GSVector8& v) - { - m = _mm256_or_ps(m, v); - } - - __forceinline void operator ^= (const GSVector8& v) - { - m = _mm256_xor_ps(m, v); - } - - __forceinline friend GSVector8 operator + (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_add_ps(v1, v2)); - } - - __forceinline friend GSVector8 operator - (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_sub_ps(v1, v2)); - } - - __forceinline friend GSVector8 operator * (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_mul_ps(v1, v2)); - } - - __forceinline friend GSVector8 operator / (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_div_ps(v1, v2)); - } - - __forceinline friend GSVector8 operator + (const GSVector8& v, float f) - { - return v + GSVector8(f); - } - - __forceinline friend GSVector8 operator - (const GSVector8& v, float f) - { - return v - GSVector8(f); - } - - __forceinline friend GSVector8 operator * (const GSVector8& v, float f) - { - return v * GSVector8(f); - } - - __forceinline friend GSVector8 operator / (const GSVector8& v, float f) - { - return v / GSVector8(f); - } - - __forceinline friend GSVector8 operator & (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_and_ps(v1, v2)); - } - - __forceinline friend GSVector8 operator | (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_or_ps(v1, v2)); - } - - __forceinline friend GSVector8 operator ^ (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_xor_ps(v1, v2)); - } - - __forceinline friend GSVector8 operator == (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_EQ_OQ)); - } - - __forceinline friend GSVector8 operator != (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_NEQ_OQ)); - } - - __forceinline friend GSVector8 operator > (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_GT_OQ)); - } - - __forceinline friend GSVector8 operator < (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LT_OQ)); - } - - __forceinline friend GSVector8 operator >= (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_GE_OQ)); - } - - __forceinline friend GSVector8 operator <= (const GSVector8& v1, const GSVector8& v2) - { - return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LE_OQ)); - } - - // x = v[31:0] / v[159:128] - // y = v[63:32] / v[191:160] - // z = v[95:64] / v[223:192] - // w = v[127:96] / v[255:224] - - - #define VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ - __forceinline GSVector8 xs##ys##zs##ws() const {return GSVector8(_mm256_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector8 xs##ys##zs##ws(const GSVector8& v) const {return GSVector8(_mm256_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} - - // vs2012u3 cannot reuse the result of equivalent shuffles when it is done with _mm256_permute_ps (write v.xxxx() twice, and it will do it twice), but with _mm256_shuffle_ps it can. - //__forceinline GSVector8 xs##ys##zs##ws() const {return GSVector8(_mm256_permute_ps(m, _MM_SHUFFLE(wn, zn, yn, xn)));} - - #define VECTOR8_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ - VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ - VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1) \ - VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2) \ - VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3) \ - - #define VECTOR8_SHUFFLE_2(xs, xn, ys, yn) \ - VECTOR8_SHUFFLE_3(xs, xn, ys, yn, x, 0) \ - VECTOR8_SHUFFLE_3(xs, xn, ys, yn, y, 1) \ - VECTOR8_SHUFFLE_3(xs, xn, ys, yn, z, 2) \ - VECTOR8_SHUFFLE_3(xs, xn, ys, yn, w, 3) \ - - #define VECTOR8_SHUFFLE_1(xs, xn) \ - VECTOR8_SHUFFLE_2(xs, xn, x, 0) \ - VECTOR8_SHUFFLE_2(xs, xn, y, 1) \ - VECTOR8_SHUFFLE_2(xs, xn, z, 2) \ - VECTOR8_SHUFFLE_2(xs, xn, w, 3) \ - - VECTOR8_SHUFFLE_1(x, 0) - VECTOR8_SHUFFLE_1(y, 1) - VECTOR8_SHUFFLE_1(z, 2) - VECTOR8_SHUFFLE_1(w, 3) - - // a = v0[127:0] - // b = v0[255:128] - // c = v1[127:0] - // d = v1[255:128] - // _ = 0 - - #define VECTOR8_PERMUTE128_2(as, an, bs, bn) \ - __forceinline GSVector8 as##bs() const {return GSVector8(_mm256_permute2f128_ps(m, m, an | (bn << 4)));} \ - __forceinline GSVector8 as##bs(const GSVector8& v) const {return GSVector8(_mm256_permute2f128_ps(m, v.m, an | (bn << 4)));} \ - - #define VECTOR8_PERMUTE128_1(as, an) \ - VECTOR8_PERMUTE128_2(as, an, a, 0) \ - VECTOR8_PERMUTE128_2(as, an, b, 1) \ - VECTOR8_PERMUTE128_2(as, an, c, 2) \ - VECTOR8_PERMUTE128_2(as, an, d, 3) \ - VECTOR8_PERMUTE128_2(as, an, _, 8) \ - - VECTOR8_PERMUTE128_1(a, 0) - VECTOR8_PERMUTE128_1(b, 1) - VECTOR8_PERMUTE128_1(c, 2) - VECTOR8_PERMUTE128_1(d, 3) - VECTOR8_PERMUTE128_1(_, 8) - - #if _M_SSE >= 0x501 - - // a = v[63:0] - // b = v[127:64] - // c = v[191:128] - // d = v[255:192] - - #define VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, ds, dn) \ - __forceinline GSVector8 as##bs##cs##ds() const {return GSVector8(_mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(m), _MM_SHUFFLE(dn, cn, bn, an))));} \ - - #define VECTOR8_PERMUTE64_3(as, an, bs, bn, cs, cn) \ - VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, a, 0) \ - VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, b, 1) \ - VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, c, 2) \ - VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, d, 3) \ - - #define VECTOR8_PERMUTE64_2(as, an, bs, bn) \ - VECTOR8_PERMUTE64_3(as, an, bs, bn, a, 0) \ - VECTOR8_PERMUTE64_3(as, an, bs, bn, b, 1) \ - VECTOR8_PERMUTE64_3(as, an, bs, bn, c, 2) \ - VECTOR8_PERMUTE64_3(as, an, bs, bn, d, 3) \ - - #define VECTOR8_PERMUTE64_1(as, an) \ - VECTOR8_PERMUTE64_2(as, an, a, 0) \ - VECTOR8_PERMUTE64_2(as, an, b, 1) \ - VECTOR8_PERMUTE64_2(as, an, c, 2) \ - VECTOR8_PERMUTE64_2(as, an, d, 3) \ - - VECTOR8_PERMUTE64_1(a, 0) - VECTOR8_PERMUTE64_1(b, 1) - VECTOR8_PERMUTE64_1(c, 2) - VECTOR8_PERMUTE64_1(d, 3) - - __forceinline GSVector8 permute32(const GSVector8i& mask) const - { - return GSVector8(_mm256_permutevar8x32_ps(m, mask)); - } - - __forceinline GSVector8 broadcast32() const - { - return GSVector8(_mm256_broadcastss_ps(_mm256_castps256_ps128(m))); - } - - __forceinline static GSVector8 broadcast32(const GSVector4& v) - { - return GSVector8(_mm256_broadcastss_ps(v.m)); - } - - __forceinline static GSVector8 broadcast32(const void* f) - { - return GSVector8(_mm256_broadcastss_ps(_mm_load_ss((const float*)f))); - } - - // TODO: v.(x0|y0|z0|w0|x1|y1|z1|w1) // broadcast element - - #endif -}; - -#endif - -// conversion - -__forceinline GSVector4i::GSVector4i(const GSVector4& v, bool truncate) -{ - m = truncate ? _mm_cvttps_epi32(v) : _mm_cvtps_epi32(v); -} - -__forceinline GSVector4::GSVector4(const GSVector4i& v) -{ - m = _mm_cvtepi32_ps(v); -} - -#if _M_SSE >= 0x501 - -__forceinline GSVector8i::GSVector8i(const GSVector8& v, bool truncate) -{ - m = truncate ? _mm256_cvttps_epi32(v) : _mm256_cvtps_epi32(v); -} - -__forceinline GSVector8::GSVector8(const GSVector8i& v) -{ - m = _mm256_cvtepi32_ps(v); -} - -#endif - -// casting - -__forceinline GSVector4i GSVector4i::cast(const GSVector4& v) -{ - return GSVector4i(_mm_castps_si128(v.m)); -} - -__forceinline GSVector4 GSVector4::cast(const GSVector4i& v) -{ - return GSVector4(_mm_castsi128_ps(v.m)); -} - -#if _M_SSE >= 0x500 - -__forceinline GSVector4i GSVector4i::cast(const GSVector8& v) -{ - return GSVector4i(_mm_castps_si128(_mm256_castps256_ps128(v))); -} - -__forceinline GSVector4 GSVector4::cast(const GSVector8& v) -{ - return GSVector4(_mm256_castps256_ps128(v)); -} - -__forceinline GSVector8 GSVector8::cast(const GSVector4i& v) -{ - return GSVector8(_mm256_castps128_ps256(_mm_castsi128_ps(v.m))); -} - -__forceinline GSVector8 GSVector8::cast(const GSVector4& v) -{ - return GSVector8(_mm256_castps128_ps256(v.m)); -} - -#endif - -#if _M_SSE >= 0x501 - -__forceinline GSVector4i GSVector4i::cast(const GSVector8i& v) -{ - return GSVector4i(_mm256_castsi256_si128(v)); -} - -__forceinline GSVector4 GSVector4::cast(const GSVector8i& v) -{ - return GSVector4(_mm_castsi128_ps(_mm256_castsi256_si128(v))); -} - -__forceinline GSVector8i GSVector8i::cast(const GSVector4i& v) -{ - return GSVector8i(_mm256_castsi128_si256(v.m)); -} - -__forceinline GSVector8i GSVector8i::cast(const GSVector4& v) -{ - return GSVector8i(_mm256_castsi128_si256(_mm_castps_si128(v.m))); -} - -__forceinline GSVector8i GSVector8i::cast(const GSVector8& v) -{ - return GSVector8i(_mm256_castps_si256(v.m)); -} - -__forceinline GSVector8 GSVector8::cast(const GSVector8i& v) -{ - return GSVector8(_mm256_castsi256_ps(v.m)); -} - -#endif - -#pragma pack(pop) diff --git a/plugins/GSdx_legacy/GSVertex.h b/plugins/GSdx_legacy/GSVertex.h deleted file mode 100644 index 198fbb93d0..0000000000 --- a/plugins/GSdx_legacy/GSVertex.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSVector.h" -#include "GSVertexHW.h" -#include "GSVertexSW.h" - -#pragma pack(push, 1) - -__aligned(struct, 32) GSVertex -{ - union - { - struct - { - GIFRegST ST; // S:0, T:4 - GIFRegRGBAQ RGBAQ; // RGBA:8, Q:12 - GIFRegXYZ XYZ; // XY:16, Z:20 - union {uint32 UV; struct {uint16 U, V;};}; // UV:24 - uint32 FOG; // FOG:28 - }; - - __m128i m[2]; - }; - - void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];} -}; - -struct GSVertexP -{ - GSVector4 p; -}; - -__aligned(struct, 32) GSVertexPT1 -{ - GSVector4 p; - GSVector2 t; -}; - -struct GSVertexPT2 -{ - GSVector4 p; - GSVector2 t[2]; -}; - -#pragma pack(pop) diff --git a/plugins/GSdx_legacy/GSVertexArrayOGL.h b/plugins/GSdx_legacy/GSVertexArrayOGL.h deleted file mode 100644 index 5c6e31833f..0000000000 --- a/plugins/GSdx_legacy/GSVertexArrayOGL.h +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (C) 2011-2011 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "config.h" - -#ifdef ENABLE_OGL_DEBUG_MEM_BW -extern uint64 g_vertex_upload_byte; -#endif - -struct GSInputLayoutOGL { - GLint size; - GLenum type; - GLboolean normalize; - GLsizei stride; - const GLvoid* offset; -}; - -template -class GSBufferOGL { - size_t m_start; - size_t m_count; - size_t m_limit; - const GLenum m_target; - GLuint m_buffer_name; - uint8* m_buffer_ptr; - const bool m_buffer_storage; - GLsync m_fence[5]; - - public: - GSBufferOGL(GLenum target) - : m_start(0) - , m_count(0) - , m_limit(0) - , m_target(target) - , m_buffer_storage(GLLoader::found_GL_ARB_buffer_storage) - { - glGenBuffers(1, &m_buffer_name); - // Opengl works best with 1-4MB buffer. - // Warning m_limit is the number of object (not the size in Bytes) - m_limit = 8 * 1024 * 1024 / STRIDE; - - for (size_t i = 0; i < 5; i++) { - m_fence[i] = 0; - } - - if (m_buffer_storage) { - // TODO: if we do manually the synchronization, I'm not sure size is important. It worths to investigate it. - // => bigger buffer => less sync - bind(); - // coherency will be done by flushing - const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; - const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT; - const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT; - - glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags ); - m_buffer_ptr = (uint8*) glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags); - if (!m_buffer_ptr) { - fprintf(stderr, "Failed to map buffer\n"); - throw GSDXError(); - } - } else { - m_buffer_ptr = NULL; - } - } - - ~GSBufferOGL() { - if (m_buffer_storage) { - for (size_t i = 0; i < 5; i++) { - glDeleteSync(m_fence[i]); - } - // Don't know if we must do it - bind(); - glUnmapBuffer(m_target); - } - glDeleteBuffers(1, &m_buffer_name); - } - - void allocate() { allocate(m_limit); } - - void allocate(size_t new_limit) - { - if (!m_buffer_storage) { - m_start = 0; - m_limit = new_limit; - glBufferData(m_target, m_limit * STRIDE, NULL, GL_STREAM_DRAW); - } - } - - void bind() - { - glBindBuffer(m_target, m_buffer_name); - } - - void subdata_upload(const void* src) - { - // Current GPU buffer is really too small need to allocate a new one - if (m_count > m_limit) { - //fprintf(stderr, "Allocate a new buffer\n %d", STRIDE); - allocate(std::max(m_count * 3 / 2, m_limit)); - - } else if (m_count > (m_limit - m_start) ) { - //fprintf(stderr, "Orphan the buffer %d\n", STRIDE); - - // Not enough left free room. Just go back at the beginning - m_start = 0; - // Orphan the buffer to avoid synchronization - allocate(m_limit); - } - - glBufferSubData(m_target, STRIDE * m_start, STRIDE * m_count, src); - } - - void map_upload(const void* src) - { - ASSERT(m_count < m_limit); - - size_t offset = m_start * STRIDE; - size_t length = m_count * STRIDE; - - if (m_count > (m_limit - m_start) ) { - size_t current_chunk = offset >> 21; -#ifdef ENABLE_OGL_DEBUG_FENCE - fprintf(stderr, "%x: Wrap buffer\n", m_target); - fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, current_chunk); -#endif - ASSERT(current_chunk > 0 && current_chunk < 5); - if (m_fence[current_chunk] == 0) { - m_fence[current_chunk] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - } - - // Wrap at startup - m_start = 0; - offset = 0; - - // Only check first chunk - if (m_fence[0]) { -#ifdef ENABLE_OGL_DEBUG_FENCE - GLenum status = glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); - if (status != GL_ALREADY_SIGNALED) { - fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target); - } -#else - glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -#endif - glDeleteSync(m_fence[0]); - m_fence[0] = 0; - } - } - - // Protect buffer with fences - size_t current_chunk = offset >> 21; - size_t next_chunk = (offset + length) >> 21; - for (size_t c = current_chunk + 1; c <= next_chunk; c++) { -#ifdef ENABLE_OGL_DEBUG_FENCE - fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c-1); -#endif - ASSERT(c > 0 && c < 5); - m_fence[c-1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - if (m_fence[c]) { -#ifdef ENABLE_OGL_DEBUG_FENCE - GLenum status = glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -#else - glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -#endif - glDeleteSync(m_fence[c]); - m_fence[c] = 0; - -#ifdef ENABLE_OGL_DEBUG_FENCE - if (status != GL_ALREADY_SIGNALED) { - fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target); - } -#endif - } - } - - void* dst = m_buffer_ptr + offset; - - memcpy(dst, src, length); - glFlushMappedBufferRange(m_target, offset, length); - } - - void upload(const void* src, uint32 count) - { -#ifdef ENABLE_OGL_DEBUG_MEM_BW - g_vertex_upload_byte += count * STRIDE; -#endif - - m_count = count; - - if (m_buffer_storage) { - map_upload(src); - } else { - subdata_upload(src); - } - } - - void EndScene() - { - m_start += m_count; - m_count = 0; - } - - void Draw(GLenum mode) - { - glDrawArrays(mode, m_start, m_count); - } - - void Draw(GLenum mode, int offset, int count) - { - glDrawArrays(mode, m_start + offset, count); - } - - - void Draw(GLenum mode, GLint basevertex) - { - glDrawElementsBaseVertex(mode, m_count, GL_UNSIGNED_INT, (void*)(m_start * STRIDE), basevertex); - } - - void Draw(GLenum mode, GLint basevertex, int offset, int count) - { - glDrawElementsBaseVertex(mode, count, GL_UNSIGNED_INT, (void*)((m_start + offset) * STRIDE), basevertex); - } - - size_t GetStart() { return m_start; } - -}; - -class GSVertexBufferStateOGL { - GSBufferOGL *m_vb; - GSBufferOGL *m_ib; - - GLuint m_va; - GLenum m_topology; - - // No copy constructor please - GSVertexBufferStateOGL(const GSVertexBufferStateOGL& ) = delete; - -public: - GSVertexBufferStateOGL(GSInputLayoutOGL* layout, uint32 layout_nbr) : m_vb(NULL), m_ib(NULL), m_topology(0) - { - glGenVertexArrays(1, &m_va); - glBindVertexArray(m_va); - - m_vb = new GSBufferOGL(GL_ARRAY_BUFFER); - m_ib = new GSBufferOGL(GL_ELEMENT_ARRAY_BUFFER); - - m_vb->bind(); - m_ib->bind(); - - m_vb->allocate(); - m_ib->allocate(); - set_internal_format(layout, layout_nbr); - } - - void bind() - { - // Note: index array are part of the VA state so it need to be bound only once. - glBindVertexArray(m_va); - if (m_vb) - m_vb->bind(); - } - - void set_internal_format(GSInputLayoutOGL* layout, uint32 layout_nbr) - { - for (uint32 i = 0; i < layout_nbr; i++) { - // Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer - glEnableVertexAttribArray(i); - switch (layout[i].type) { - case GL_UNSIGNED_SHORT: - case GL_UNSIGNED_INT: - if (layout[i].normalize) { - glVertexAttribPointer(i, layout[i].size, layout[i].type, layout[i].normalize, layout[i].stride, layout[i].offset); - } else { - // Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I) - glVertexAttribIPointer(i, layout[i].size, layout[i].type, layout[i].stride, layout[i].offset); - } - break; - default: - glVertexAttribPointer(i, layout[i].size, layout[i].type, layout[i].normalize, layout[i].stride, layout[i].offset); - break; - } - } - } - - void EndScene() - { - m_vb->EndScene(); - m_ib->EndScene(); - } - - void DrawPrimitive() { m_vb->Draw(m_topology); } - - void DrawPrimitive(int offset, int count) { m_vb->Draw(m_topology, offset, count); } - - void DrawIndexedPrimitive() { m_ib->Draw(m_topology, m_vb->GetStart() ); } - - void DrawIndexedPrimitive(int offset, int count) { m_ib->Draw(m_topology, m_vb->GetStart(), offset, count ); } - - void SetTopology(GLenum topology) { m_topology = topology; } - - void UploadVB(const void* vertices, size_t count) { m_vb->upload(vertices, count); } - - void UploadIB(const void* index, size_t count) { - m_ib->upload(index, count); - } - - ~GSVertexBufferStateOGL() - { - glDeleteVertexArrays(1, &m_va); - delete m_vb; - delete m_ib; - } - -}; diff --git a/plugins/GSdx_legacy/GSVertexHW.h b/plugins/GSdx_legacy/GSVertexHW.h deleted file mode 100644 index e0fe308b62..0000000000 --- a/plugins/GSdx_legacy/GSVertexHW.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GS.h" -#include "GSVector.h" - -#pragma pack(push, 1) - -__aligned(struct, 32) GSVertexHW9 -{ - GSVector4 t; - GSVector4 p; - - // t.z = union {struct {uint8 r, g, b, a;}; uint32 c0;}; - // t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;} - - GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;} -}; - -#pragma pack(pop) diff --git a/plugins/GSdx_legacy/GSVertexList.cpp b/plugins/GSdx_legacy/GSVertexList.cpp deleted file mode 100644 index f6477c3ae6..0000000000 --- a/plugins/GSdx_legacy/GSVertexList.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "StdAfx.h" -#include "GSVertexList.h" diff --git a/plugins/GSdx_legacy/GSVertexList.h b/plugins/GSdx_legacy/GSVertexList.h deleted file mode 100644 index bd0b016d4c..0000000000 --- a/plugins/GSdx_legacy/GSVertexList.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -template class GSVertexList -{ - void* m_base; - Vertex* m_v[3]; - int m_count; - -public: - GSVertexList() - : m_count(0) - { - m_base = _aligned_malloc(sizeof(Vertex) * countof(m_v), 32); - - for(size_t i = 0; i < countof(m_v); i++) - { - m_v[i] = &((Vertex*)m_base)[i]; - } - } - - virtual ~GSVertexList() - { - _aligned_free(m_base); - } - - void RemoveAll() - { - m_count = 0; - } - - __forceinline Vertex& AddTail() - { - ASSERT(m_count < 3); - - return *m_v[m_count++]; - } - - __forceinline void RemoveAt(int pos, int keep) - { - if(keep == 1) - { - Vertex* tmp = m_v[pos + 0]; - m_v[pos + 0] = m_v[pos + 1]; - m_v[pos + 1] = tmp; - } - else if(keep == 2) - { - Vertex* tmp = m_v[pos + 0]; - m_v[pos + 0] = m_v[pos + 1]; - m_v[pos + 1] = m_v[pos + 2]; - m_v[pos + 2] = tmp; - } - - m_count = pos + keep; - } - - __forceinline void GetAt(int i, Vertex& v) - { - v = *m_v[i]; - } - - int GetCount() - { - return m_count; - } -}; diff --git a/plugins/GSdx_legacy/GSVertexSW.cpp b/plugins/GSdx_legacy/GSVertexSW.cpp deleted file mode 100644 index 2b286741f7..0000000000 --- a/plugins/GSdx_legacy/GSVertexSW.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSVertexSW.h" \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSVertexSW.h b/plugins/GSdx_legacy/GSVertexSW.h deleted file mode 100644 index 0d977658d7..0000000000 --- a/plugins/GSdx_legacy/GSVertexSW.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSVector.h" - -__aligned(struct, 32) GSVertexSW -{ - GSVector4 p, _pad, t, c; - - __forceinline GSVertexSW() {} - __forceinline GSVertexSW(const GSVertexSW& v) {*this = v;} - - __forceinline static GSVertexSW zero() - { - GSVertexSW v; - - v.p = GSVector4::zero(); - v.t = GSVector4::zero(); - v.c = GSVector4::zero(); - - return v; - } - __forceinline void operator = (const GSVertexSW& v) - { - p = v.p; - t = v.t; - c = v.c; - } - - __forceinline void operator += (const GSVertexSW& v) - { - p += v.p; - t += v.t; - c += v.c; - } - - __forceinline friend GSVertexSW operator + (const GSVertexSW& a, const GSVertexSW& b) - { - GSVertexSW v; - - v.p = a.p + b.p; - v.t = a.t + b.t; - v.c = a.c + b.c; - - return v; - } - - __forceinline friend GSVertexSW operator - (const GSVertexSW& a, const GSVertexSW& b) - { - GSVertexSW v; - - v.p = a.p - b.p; - v.t = a.t - b.t; - v.c = a.c - b.c; - - return v; - } - - __forceinline friend GSVertexSW operator * (const GSVertexSW& a, const GSVector4& b) - { - GSVertexSW v; - - v.p = a.p * b; - v.t = a.t * b; - v.c = a.c * b; - - return v; - } - - __forceinline friend GSVertexSW operator / (const GSVertexSW& a, const GSVector4& b) - { - GSVertexSW v; - - v.p = a.p / b; - v.t = a.t / b; - v.c = a.c / b; - - return v; - } - - static bool IsQuad(const GSVertexSW* v, int& tl, int& br) - { - GSVector4 v0 = v[0].p.xyxy(v[0].t); - GSVector4 v1 = v[1].p.xyxy(v[1].t); - GSVector4 v2 = v[2].p.xyxy(v[2].t); - - GSVector4 v01 = v0 == v1; - GSVector4 v12 = v1 == v2; - GSVector4 v02 = v0 == v2; - - GSVector4 vtl, vbr; - - GSVector4 test; - - int i; - - if(v12.allfalse()) - { - test = (v01 ^ v02) & (v01 ^ v02.zwxy()); - vtl = v0; - vbr = v1 + (v2 - v0); - i = 0; - } - else if(v02.allfalse()) - { - test = (v01 ^ v12) & (v01 ^ v12.zwxy()); - vtl = v1; - vbr = v0 + (v2 - v1); - i = 1; - } - else if(v01.allfalse()) - { - test = (v02 ^ v12) & (v02 ^ v12.zwxy()); - vtl = v2; - vbr = v0 + (v1 - v2); - i = 2; - } - else - { - return false; - } - - if(!test.alltrue()) - { - return false; - } - - tl = i; - - GSVector4 v3 = v[3].p.xyxy(v[3].t); - GSVector4 v4 = v[4].p.xyxy(v[4].t); - GSVector4 v5 = v[5].p.xyxy(v[5].t); - - GSVector4 v34 = v3 == v4; - GSVector4 v45 = v4 == v5; - GSVector4 v35 = v3 == v5; - - if(v34.allfalse()) - { - test = (v35 ^ v45) & (v35 ^ v45.zwxy()) & (vtl + v5 == v3 + v4) & (vbr == v5); - i = 5; - } - else if(v35.allfalse()) - { - test = (v34 ^ v45) & (v34 ^ v45.zwxy()) & (vtl + v4 == v3 + v5) & (vbr == v4); - i = 4; - } - else if(v45.allfalse()) - { - test = (v34 ^ v35) & (v34 ^ v35.zwxy()) & (vtl + v3 == v5 + v4) & (vbr == v3); - i = 3; - } - else - { - return false; - } - - if(!test.alltrue()) - { - return false; - } - - br = i; - - #if _M_SSE >= 0x500 - - { - // p.z, p.w, t.z, t.w, c.x, c.y, c.z, c.w - - GSVector8 v0 = GSVector8(v[0].p.zwzw(v[0].t), v[0].c); - GSVector8 v1 = GSVector8(v[1].p.zwzw(v[1].t), v[1].c); - GSVector8 v2 = GSVector8(v[2].p.zwzw(v[2].t), v[2].c); - GSVector8 v3 = GSVector8(v[3].p.zwzw(v[3].t), v[3].c); - GSVector8 v4 = GSVector8(v[4].p.zwzw(v[4].t), v[4].c); - GSVector8 v5 = GSVector8(v[5].p.zwzw(v[5].t), v[5].c); - - GSVector8 test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5); - - return test.alltrue(); - } - - #else - - v0 = v[0].p.zwzw(v[0].t); - v1 = v[1].p.zwzw(v[1].t); - v2 = v[2].p.zwzw(v[2].t); - v3 = v[3].p.zwzw(v[3].t); - v4 = v[4].p.zwzw(v[4].t); - v5 = v[5].p.zwzw(v[5].t); - - test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5); - - if(!test.alltrue()) - { - return false; - } - - v0 = v[0].c; - v1 = v[1].c; - v2 = v[2].c; - v3 = v[3].c; - v4 = v[4].c; - v5 = v[5].c; - - test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5); - - if(!test.alltrue()) - { - return false; - } - - return true; - - #endif - } -}; - -#if _M_SSE >= 0x501 - -__aligned(struct, 32) GSVertexSW2 -{ - GSVector4 p, _pad; - GSVector8 tc; - - __forceinline GSVertexSW2() {} - __forceinline GSVertexSW2(const GSVertexSW2& v) {*this = v;} - - __forceinline void operator = (const GSVertexSW2& v) - { - p = v.p; - tc = v.tc; - } - - __forceinline friend GSVertexSW2 operator - (const GSVertexSW2& a, const GSVertexSW2& b) - { - GSVertexSW2 v; - - v.p = a.p - b.p; - v.tc = a.tc - b.tc; - - return v; - } -}; - -#endif - diff --git a/plugins/GSdx_legacy/GSVertexTrace.cpp b/plugins/GSdx_legacy/GSVertexTrace.cpp deleted file mode 100644 index 9ef65d86bb..0000000000 --- a/plugins/GSdx_legacy/GSVertexTrace.cpp +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSVertexTrace.h" -#include "GSUtil.h" -#include "GSState.h" - -const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX); - -GSVertexTrace::GSVertexTrace(const GSState* state) - : m_state(state) -{ - m_primclass = GS_INVALID_CLASS; - memset(&m_alpha, 0, sizeof(m_alpha)); - - #define InitUpdate3(P, IIP, TME, FST, COLOR) \ - m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax; - - #define InitUpdate2(P, IIP, TME) \ - InitUpdate3(P, IIP, TME, 0, 0) \ - InitUpdate3(P, IIP, TME, 0, 1) \ - InitUpdate3(P, IIP, TME, 1, 0) \ - InitUpdate3(P, IIP, TME, 1, 1) \ - - #define InitUpdate(P) \ - InitUpdate2(P, 0, 0) \ - InitUpdate2(P, 0, 1) \ - InitUpdate2(P, 1, 0) \ - InitUpdate2(P, 1, 1) \ - - InitUpdate(GS_POINT_CLASS); - InitUpdate(GS_LINE_CLASS); - InitUpdate(GS_TRIANGLE_CLASS); - InitUpdate(GS_SPRITE_CLASS); -} - -void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) -{ - m_primclass = primclass; - - uint32 iip = m_state->PRIM->IIP; - uint32 tme = m_state->PRIM->TME; - uint32 fst = m_state->PRIM->FST; - uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC); - - (this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, count); - - m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); - - m_alpha.valid = false; - - if(m_state->PRIM->TME) - { - const GIFRegTEX1& TEX1 = m_state->m_context->TEX1; - - m_filter.mmag = TEX1.IsMagLinear(); - m_filter.mmin = TEX1.IsMinLinear(); - - if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2 - { - m_filter.linear = m_filter.mmag; - - return; - } - - float K = (float)TEX1.K / 16; - - if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated - { - // LOD = log2(1/|Q|) * (1 << L) + K - - GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K); - - if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;} - } - else - { - m_lod.x = K; - m_lod.y = K; - } - - if(m_lod.y <= 0) - { - m_filter.linear = m_filter.mmag; - } - else if(m_lod.x > 0) - { - m_filter.linear = m_filter.mmin; - } - else - { - m_filter.linear = m_filter.mmag | m_filter.mmin; - } - } -} - -template -void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count) -{ - const GSDrawingContext* context = m_state->m_context; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - GSVector4 tmin = s_minmax.xxxx(); - GSVector4 tmax = s_minmax.yyyy(); - GSVector4i cmin = GSVector4i::xffffffff(); - GSVector4i cmax = GSVector4i::zero(); - - #if _M_SSE >= 0x401 - - GSVector4i pmin = GSVector4i::xffffffff(); - GSVector4i pmax = GSVector4i::zero(); - - #else - - GSVector4 pmin = s_minmax.xxxx(); - GSVector4 pmax = s_minmax.yyyy(); - - #endif - - const GSVertex* RESTRICT v = (GSVertex*)vertex; - - for(int i = 0; i < count; i += n) - { - if(primclass == GS_POINT_CLASS) - { - GSVector4i c(v[index[i]].m[0]); - - if(color) - { - cmin = cmin.min_u8(c); - cmax = cmax.max_u8(c); - } - - if(tme) - { - if(!fst) - { - GSVector4 stq = GSVector4::cast(c); - - GSVector4 q = stq.wwww(); - - stq = (stq.xyww() * q.rcpnr()).xyww(q); - - tmin = tmin.min(stq); - tmax = tmax.max(stq); - } - else - { - GSVector4i uv(v[index[i]].m[1]); - - GSVector4 st = GSVector4(uv.uph16()).xyxy(); - - tmin = tmin.min(st); - tmax = tmax.max(st); - } - } - - GSVector4i xyzf(v[index[i]].m[1]); - - GSVector4i xy = xyzf.upl16(); - GSVector4i z = xyzf.yyyy(); - - #if _M_SSE >= 0x401 - - GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf)); - - pmin = pmin.min_u32(p); - pmax = pmax.max_u32(p); - - #else - - GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww()))); - - pmin = pmin.min(p); - pmax = pmax.max(p); - - #endif - } - else if(primclass == GS_LINE_CLASS) - { - GSVector4i c0(v[index[i + 0]].m[0]); - GSVector4i c1(v[index[i + 1]].m[0]); - - if(color) - { - if(iip) - { - cmin = cmin.min_u8(c0.min_u8(c1)); - cmax = cmax.max_u8(c0.max_u8(c1)); - } - else - { - cmin = cmin.min_u8(c1); - cmax = cmax.max_u8(c1); - } - } - - if(tme) - { - if(!fst) - { - GSVector4 stq0 = GSVector4::cast(c0); - GSVector4 stq1 = GSVector4::cast(c1); - - GSVector4 q = stq0.wwww(stq1).rcpnr(); - - stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0); - stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1); - - tmin = tmin.min(stq0.min(stq1)); - tmax = tmax.max(stq0.max(stq1)); - } - else - { - GSVector4i uv0(v[index[i + 0]].m[1]); - GSVector4i uv1(v[index[i + 1]].m[1]); - - GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); - GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); - - tmin = tmin.min(st0.min(st1)); - tmax = tmax.max(st0.max(st1)); - } - } - - GSVector4i xyzf0(v[index[i + 0]].m[1]); - GSVector4i xyzf1(v[index[i + 1]].m[1]); - - GSVector4i xy0 = xyzf0.upl16(); - GSVector4i z0 = xyzf0.yyyy(); - GSVector4i xy1 = xyzf1.upl16(); - GSVector4i z1 = xyzf1.yyyy(); - - #if _M_SSE >= 0x401 - - GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); - GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); - - pmin = pmin.min_u32(p0.min_u32(p1)); - pmax = pmax.max_u32(p0.max_u32(p1)); - - #else - - GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); - GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); - - pmin = pmin.min(p0.min(p1)); - pmax = pmax.max(p0.max(p1)); - - #endif - } - else if(primclass == GS_TRIANGLE_CLASS) - { - GSVector4i c0(v[index[i + 0]].m[0]); - GSVector4i c1(v[index[i + 1]].m[0]); - GSVector4i c2(v[index[i + 2]].m[0]); - - if(color) - { - if(iip) - { - cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1)); - cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1)); - } - else - { - cmin = cmin.min_u8(c2); - cmax = cmax.max_u8(c2); - } - } - - if(tme) - { - if(!fst) - { - GSVector4 stq0 = GSVector4::cast(c0); - GSVector4 stq1 = GSVector4::cast(c1); - GSVector4 stq2 = GSVector4::cast(c2); - - GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr(); - - stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0); - stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1); - stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2); - - tmin = tmin.min(stq2).min(stq0.min(stq1)); - tmax = tmax.max(stq2).max(stq0.max(stq1)); - } - else - { - GSVector4i uv0(v[index[i + 0]].m[1]); - GSVector4i uv1(v[index[i + 1]].m[1]); - GSVector4i uv2(v[index[i + 2]].m[1]); - - GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); - GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); - GSVector4 st2 = GSVector4(uv2.uph16()).xyxy(); - - tmin = tmin.min(st2).min(st0.min(st1)); - tmax = tmax.max(st2).max(st0.max(st1)); - } - } - - GSVector4i xyzf0(v[index[i + 0]].m[1]); - GSVector4i xyzf1(v[index[i + 1]].m[1]); - GSVector4i xyzf2(v[index[i + 2]].m[1]); - - GSVector4i xy0 = xyzf0.upl16(); - GSVector4i z0 = xyzf0.yyyy(); - GSVector4i xy1 = xyzf1.upl16(); - GSVector4i z1 = xyzf1.yyyy(); - GSVector4i xy2 = xyzf2.upl16(); - GSVector4i z2 = xyzf2.yyyy(); - - #if _M_SSE >= 0x401 - - GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0)); - GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); - GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2)); - - pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1)); - pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1)); - - #else - - GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww()))); - GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); - GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww()))); - - pmin = pmin.min(p2).min(p0.min(p1)); - pmax = pmax.max(p2).max(p0.max(p1)); - - #endif - } - else if(primclass == GS_SPRITE_CLASS) - { - GSVector4i c0(v[index[i + 0]].m[0]); - GSVector4i c1(v[index[i + 1]].m[0]); - - if(color) - { - if(iip) - { - cmin = cmin.min_u8(c0.min_u8(c1)); - cmax = cmax.max_u8(c0.max_u8(c1)); - } - else - { - cmin = cmin.min_u8(c1); - cmax = cmax.max_u8(c1); - } - } - - if(tme) - { - if(!fst) - { - GSVector4 stq0 = GSVector4::cast(c0); - GSVector4 stq1 = GSVector4::cast(c1); - - GSVector4 q = stq1.wwww().rcpnr(); - - stq0 = (stq0.xyww() * q).xyww(stq1); - stq1 = (stq1.xyww() * q).xyww(stq1); - - tmin = tmin.min(stq0.min(stq1)); - tmax = tmax.max(stq0.max(stq1)); - } - else - { - GSVector4i uv0(v[index[i + 0]].m[1]); - GSVector4i uv1(v[index[i + 1]].m[1]); - - GSVector4 st0 = GSVector4(uv0.uph16()).xyxy(); - GSVector4 st1 = GSVector4(uv1.uph16()).xyxy(); - - tmin = tmin.min(st0.min(st1)); - tmax = tmax.max(st0.max(st1)); - } - } - - GSVector4i xyzf0(v[index[i + 0]].m[1]); - GSVector4i xyzf1(v[index[i + 1]].m[1]); - - GSVector4i xy0 = xyzf0.upl16(); - GSVector4i z0 = xyzf0.yyyy(); - GSVector4i xy1 = xyzf1.upl16(); - GSVector4i z1 = xyzf1.yyyy(); - - #if _M_SSE >= 0x401 - - GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1)); - GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1)); - - pmin = pmin.min_u32(p0.min_u32(p1)); - pmax = pmax.max_u32(p0.max_u32(p1)); - - #else - - GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww()))); - GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww()))); - - pmin = pmin.min(p0.min(p1)); - pmax = pmax.max(p0.max(p1)); - - #endif - } - } - - #if _M_SSE >= 0x401 - - pmin = pmin.blend16<0x30>(pmin.srl32(1)); - pmax = pmax.blend16<0x30>(pmax.srl32(1)); - - #endif - - GSVector4 o(context->XYOFFSET); - GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f); - - m_min.p = (GSVector4(pmin) - o) * s; - m_max.p = (GSVector4(pmax) - o) * s; - - if(tme) - { - if(fst) - { - s = GSVector4(1.0f / 16, 1.0f).xxyy(); - } - else - { - s = GSVector4(1 << context->TEX0.TW, 1 << context->TEX0.TH, 1, 1); - } - - m_min.t = tmin * s; - m_max.t = tmax * s; - } - else - { - m_min.t = GSVector4::zero(); - m_max.t = GSVector4::zero(); - } - - if(color) - { - m_min.c = cmin.zzzz().u8to32(); - m_max.c = cmax.zzzz().u8to32(); - } - else - { - m_min.c = GSVector4i::zero(); - m_max.c = GSVector4i::zero(); - } -} diff --git a/plugins/GSdx_legacy/GSVertexTrace.h b/plugins/GSdx_legacy/GSVertexTrace.h deleted file mode 100644 index b3ee0b73e6..0000000000 --- a/plugins/GSdx_legacy/GSVertexTrace.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSDrawingContext.h" -#include "GSVertex.h" -#include "GSVertexSW.h" -#include "GSVertexHW.h" -#include "GSFunctionMap.h" - -class GSState; - -__aligned(class, 32) GSVertexTrace : public GSAlignedClass<32> -{ -public: - struct Vertex {GSVector4i c; GSVector4 p, t;}; - struct VertexAlpha {int min, max; bool valid;}; - -protected: - const GSState* m_state; - - static const GSVector4 s_minmax; - - typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count); - - FindMinMaxPtr m_fmm[2][2][2][2][4]; - - template - void FindMinMax(const void* vertex, const uint32* index, int count); - -public: - GS_PRIM_CLASS m_primclass; - - Vertex m_min; - Vertex m_max; - VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it - - union - { - uint32 value; - struct {uint32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1;}; - struct {uint32 rgba:16, xyzf:4, stq:4;}; - } m_eq; - - union - { - struct {uint32 mmag:1, mmin:1, linear:1;}; - } m_filter; - - GSVector2 m_lod; // x = min, y = max - -public: - GSVertexTrace(const GSState* state); - virtual ~GSVertexTrace() {} - - void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); - - bool IsLinear() const {return m_filter.linear;} -}; diff --git a/plugins/GSdx_legacy/GSWnd.cpp b/plugins/GSdx_legacy/GSWnd.cpp deleted file mode 100644 index ebc230f19a..0000000000 --- a/plugins/GSdx_legacy/GSWnd.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (C) 2011-2014 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSWnd.h" - -void GSWndGL::PopulateGlFunction() -{ - *(void**)&(gl_ActiveTexture) = GetProcAddress("glActiveTexture"); - *(void**)&(gl_BlendColor) = GetProcAddress("glBlendColor"); - - // Load mandatory function pointer -#define GL_EXT_LOAD_OPT(ext) *(void**)&(ext) = GetProcAddress(#ext, true) - // Load extra function pointer -#define GL_EXT_LOAD(ext) *(void**)&(ext) = GetProcAddress(#ext, true) - - GL_EXT_LOAD(glBlendEquationSeparate); - GL_EXT_LOAD(glBlendFuncSeparate); - GL_EXT_LOAD(glAttachShader); - GL_EXT_LOAD(glBindBuffer); - GL_EXT_LOAD(glBindBufferBase); - GL_EXT_LOAD(glBindBufferRange); - GL_EXT_LOAD(glBindFramebuffer); - GL_EXT_LOAD(glBindSampler); - GL_EXT_LOAD(glBindVertexArray); - GL_EXT_LOAD(glBlitFramebuffer); - GL_EXT_LOAD(glBufferData); - GL_EXT_LOAD(glCheckFramebufferStatus); - GL_EXT_LOAD(glClearBufferfv); - GL_EXT_LOAD(glClearBufferiv); - GL_EXT_LOAD(glClearBufferuiv); - GL_EXT_LOAD(glColorMaski); - GL_EXT_LOAD(glDeleteBuffers); - GL_EXT_LOAD(glDeleteFramebuffers); - GL_EXT_LOAD(glDeleteSamplers); - GL_EXT_LOAD(glDeleteVertexArrays); - GL_EXT_LOAD(glDetachShader); - GL_EXT_LOAD(glDrawBuffers); - GL_EXT_LOAD(glDrawElementsBaseVertex); - GL_EXT_LOAD(glEnableVertexAttribArray); - GL_EXT_LOAD(glFramebufferRenderbuffer); - GL_EXT_LOAD(glFramebufferTexture2D); - GL_EXT_LOAD(glGenBuffers); - GL_EXT_LOAD(glGenFramebuffers); - GL_EXT_LOAD(glGenSamplers); - GL_EXT_LOAD(glGenVertexArrays); - GL_EXT_LOAD(glGetBufferParameteriv); - GL_EXT_LOAD(glGetDebugMessageLogARB); - GL_EXT_LOAD_OPT(glDebugMessageCallback); - GL_EXT_LOAD(glGetProgramInfoLog); - GL_EXT_LOAD(glGetProgramiv); - GL_EXT_LOAD(glGetShaderiv); - GL_EXT_LOAD(glGetStringi); - GL_EXT_LOAD(glIsFramebuffer); - GL_EXT_LOAD(glMapBuffer); - GL_EXT_LOAD(glMapBufferRange); - GL_EXT_LOAD(glProgramParameteri); - GL_EXT_LOAD(glSamplerParameterf); - GL_EXT_LOAD(glSamplerParameteri); - GL_EXT_LOAD(glShaderSource); - GL_EXT_LOAD(glUniform1i); - GL_EXT_LOAD(glUnmapBuffer); - GL_EXT_LOAD(glVertexAttribIPointer); - GL_EXT_LOAD(glVertexAttribPointer); - GL_EXT_LOAD(glBufferSubData); - GL_EXT_LOAD(glFenceSync); - GL_EXT_LOAD(glDeleteSync); - GL_EXT_LOAD(glClientWaitSync); - GL_EXT_LOAD(glFlushMappedBufferRange); - // Query object - GL_EXT_LOAD(glBeginQuery); - GL_EXT_LOAD(glEndQuery); - GL_EXT_LOAD(glGetQueryiv); - GL_EXT_LOAD(glGetQueryObjectiv); - GL_EXT_LOAD(glGetQueryObjectuiv); - GL_EXT_LOAD(glQueryCounter); - GL_EXT_LOAD(glGetQueryObjecti64v); - GL_EXT_LOAD(glGetQueryObjectui64v); - GL_EXT_LOAD(glGetInteger64v); - // GL4.0 - GL_EXT_LOAD_OPT(glBlendEquationSeparateiARB); - GL_EXT_LOAD_OPT(glBlendFuncSeparateiARB); - // GL4.1 - GL_EXT_LOAD_OPT(glCreateShaderProgramv); - GL_EXT_LOAD_OPT(glBindProgramPipeline); - GL_EXT_LOAD_OPT(glDeleteProgramPipelines); - GL_EXT_LOAD_OPT(glGenProgramPipelines); - GL_EXT_LOAD_OPT(glGetProgramPipelineiv); - GL_EXT_LOAD_OPT(glGetProgramPipelineInfoLog); - GL_EXT_LOAD_OPT(glValidateProgramPipeline); - GL_EXT_LOAD_OPT(glUseProgramStages); - GL_EXT_LOAD_OPT(glProgramUniform1i); // but no GL4.2 - GL_EXT_LOAD_OPT(glGetProgramBinary); - GL_EXT_LOAD_OPT(glViewportIndexedf); - GL_EXT_LOAD_OPT(glViewportIndexedfv); - GL_EXT_LOAD_OPT(glScissorIndexed); - GL_EXT_LOAD_OPT(glScissorIndexedv); - // NO GL4.1 - GL_EXT_LOAD(glDeleteProgram); - GL_EXT_LOAD(glDeleteShader); - GL_EXT_LOAD(glCompileShader); - GL_EXT_LOAD(glCreateProgram); - GL_EXT_LOAD(glCreateShader); - GL_EXT_LOAD(glUseProgram); - GL_EXT_LOAD(glGetShaderInfoLog); - GL_EXT_LOAD(glLinkProgram); - // GL4.2 - GL_EXT_LOAD_OPT(glBindImageTexture); - GL_EXT_LOAD_OPT(glMemoryBarrier); - GL_EXT_LOAD(glTexStorage2D); - // GL4.3 - GL_EXT_LOAD_OPT(glCopyImageSubData); - GL_EXT_LOAD_OPT(glInvalidateTexImage); - GL_EXT_LOAD_OPT(glPushDebugGroup); - GL_EXT_LOAD_OPT(glPopDebugGroup); - GL_EXT_LOAD_OPT(glDebugMessageInsert); - GL_EXT_LOAD_OPT(glDebugMessageControl); - // GL4.4 - GL_EXT_LOAD_OPT(glClearTexImage); - GL_EXT_LOAD_OPT(glBufferStorage); - - // GL4.5 - GL_EXT_LOAD_OPT(glCreateTextures); - GL_EXT_LOAD_OPT(glTextureStorage2D); - GL_EXT_LOAD_OPT(glTextureSubImage2D); - GL_EXT_LOAD_OPT(glCopyTextureSubImage2D); - GL_EXT_LOAD_OPT(glBindTextureUnit); - GL_EXT_LOAD_OPT(glGetTextureImage); - GL_EXT_LOAD_OPT(glTextureParameteri); - - GL_EXT_LOAD_OPT(glCreateFramebuffers); - GL_EXT_LOAD_OPT(glClearNamedFramebufferfv); - GL_EXT_LOAD_OPT(glClearNamedFramebufferuiv); - GL_EXT_LOAD_OPT(glClearNamedFramebufferiv); - GL_EXT_LOAD_OPT(glNamedFramebufferTexture); - GL_EXT_LOAD_OPT(glNamedFramebufferDrawBuffers); - GL_EXT_LOAD_OPT(glNamedFramebufferReadBuffer); - GL_EXT_LOAD_OPT(glCheckNamedFramebufferStatus); - - GL_EXT_LOAD_OPT(glCreateBuffers); - GL_EXT_LOAD_OPT(glNamedBufferStorage); - GL_EXT_LOAD_OPT(glNamedBufferData); - GL_EXT_LOAD_OPT(glNamedBufferSubData); - GL_EXT_LOAD_OPT(glMapNamedBuffer); - GL_EXT_LOAD_OPT(glMapNamedBufferRange); - GL_EXT_LOAD_OPT(glUnmapNamedBuffer); - GL_EXT_LOAD_OPT(glFlushMappedNamedBufferRange); - - GL_EXT_LOAD_OPT(glCreateSamplers); - GL_EXT_LOAD_OPT(glCreateProgramPipelines); - - GL_EXT_LOAD_OPT(glClipControl); - GL_EXT_LOAD_OPT(glTextureBarrier); - - if (glCreateFramebuffers == NULL) { - Emulate_DSA::Init(); - } -} diff --git a/plugins/GSdx_legacy/GSWnd.h b/plugins/GSdx_legacy/GSWnd.h deleted file mode 100644 index fed7a278e2..0000000000 --- a/plugins/GSdx_legacy/GSWnd.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "stdafx.h" -#include "GSdx.h" -#include "GSVector.h" - -class GSWnd -{ -protected: - bool m_managed; // set true when we're attached to a 3rdparty window that's amanged by the emulator - -public: - GSWnd() : m_managed(false) {}; - virtual ~GSWnd() {}; - - virtual bool Create(const string& title, int w, int h) = 0; - virtual bool Attach(void* handle, bool managed = true) = 0; - virtual void Detach() = 0; - bool IsManaged() const {return m_managed;} - - virtual void* GetDisplay() = 0; - virtual void* GetHandle() = 0; - virtual GSVector4i GetClientRect() = 0; - virtual bool SetWindowText(const char* title) = 0; - - virtual void AttachContext() {}; - virtual void DetachContext() {}; - - virtual void Show() = 0; - virtual void Hide() = 0; - virtual void HideFrame() = 0; - - virtual void Flip() {}; - virtual void SetVSync(bool enable) {}; - -}; - -class GSWndGL : public GSWnd -{ -protected: - bool m_ctx_attached; - - bool IsContextAttached() const { return m_ctx_attached; } - -public: - GSWndGL() : m_ctx_attached(false) {}; - virtual ~GSWndGL() {}; - - virtual bool Create(const string& title, int w, int h) = 0; - virtual bool Attach(void* handle, bool managed = true) = 0; - virtual void Detach() = 0; - - virtual void* GetDisplay() = 0; - virtual void* GetHandle() = 0; - virtual GSVector4i GetClientRect() = 0; - virtual bool SetWindowText(const char* title) = 0; - - virtual void AttachContext() = 0; - virtual void DetachContext() = 0; - virtual void* GetProcAddress(const char* name, bool opt = false) = 0; - - virtual void Show() = 0; - virtual void Hide() = 0; - virtual void HideFrame() = 0; - virtual void Flip() = 0; - virtual void SetVSync(bool enable) = 0; - - void PopulateGlFunction(); -}; diff --git a/plugins/GSdx_legacy/GSWndDX.cpp b/plugins/GSdx_legacy/GSWndDX.cpp deleted file mode 100644 index 52750aa350..0000000000 --- a/plugins/GSdx_legacy/GSWndDX.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (C) 2007-2012 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSWndDX.h" - -#ifdef _WIN32 -GSWndDX::GSWndDX() - : m_hWnd(NULL) - , m_frame(true) -{ -} - -GSWndDX::~GSWndDX() -{ -} - -LRESULT CALLBACK GSWndDX::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) -{ - GSWndDX* wnd = NULL; - - if(message == WM_NCCREATE) - { - wnd = (GSWndDX*)((LPCREATESTRUCT)lParam)->lpCreateParams; - - SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)wnd); - - wnd->m_hWnd = hWnd; - } - else - { - wnd = (GSWndDX*)GetWindowLongPtr(hWnd, GWLP_USERDATA); - } - - if(wnd == NULL) - { - return DefWindowProc(hWnd, message, wParam, lParam); - } - - return wnd->OnMessage(message, wParam, lParam); -} - -LRESULT GSWndDX::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) -{ - switch(message) - { - case WM_CLOSE: - Hide(); - // DestroyWindow(m_hWnd); - return 0; - case WM_DESTROY: - // This kills the emulator when GS is closed, which *really* isn't desired behavior, - // especially in STGS mode (worked in MTGS mode since it only quit the thread, but even - // that wasn't needed). - //PostQuitMessage(0); - return 0; - default: - break; - } - - return DefWindowProc((HWND)m_hWnd, message, wParam, lParam); -} - -bool GSWndDX::Create(const string& title, int w, int h) -{ - if(m_hWnd) return false; - - m_managed = true; - - WNDCLASS wc; - - memset(&wc, 0, sizeof(wc)); - - wc.style = CS_HREDRAW | CS_VREDRAW | CS_DBLCLKS; - wc.lpfnWndProc = WndProc; - wc.hInstance = theApp.GetModuleHandle(); - // TODO: wc.hIcon = ; - wc.hCursor = LoadCursor(NULL, IDC_ARROW); - wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH); - wc.lpszClassName = "GSWndDX"; - - if(!GetClassInfo(wc.hInstance, wc.lpszClassName, &wc)) - { - if(!RegisterClass(&wc)) - { - return false; - } - } - - DWORD style = WS_CLIPCHILDREN | WS_CLIPSIBLINGS | WS_OVERLAPPEDWINDOW | WS_BORDER; - - GSVector4i r; - - GetWindowRect(GetDesktopWindow(), r); - - bool remote = !!GetSystemMetrics(SM_REMOTESESSION); - - if(w <= 0 || h <= 0 || remote) - { - w = r.width() / 3; - h = r.width() / 4; - - if(!remote) - { - w *= 2; - h *= 2; - } - } - - r.left = (r.left + r.right - w) / 2; - r.top = (r.top + r.bottom - h) / 2; - r.right = r.left + w; - r.bottom = r.top + h; - - AdjustWindowRect(r, style, FALSE); - - m_hWnd = CreateWindow(wc.lpszClassName, title.c_str(), style, r.left, r.top, r.width(), r.height(), NULL, NULL, wc.hInstance, (LPVOID)this); - - return m_hWnd != NULL; -} - -bool GSWndDX::Attach(void* handle, bool managed) -{ - // TODO: subclass - - m_hWnd = (HWND)handle; - m_managed = managed; - - return true; -} - -void GSWndDX::Detach() -{ - if(m_hWnd && m_managed) - { - // close the window, since it's under GSdx care. It's not taking messages anyway, and - // that means its big, ugly, and in the way. - - DestroyWindow(m_hWnd); - } - - m_hWnd = NULL; - m_managed = true; -} - -GSVector4i GSWndDX::GetClientRect() -{ - GSVector4i r; - - ::GetClientRect(m_hWnd, r); - - return r; -} - -// Returns FALSE if the window has no title, or if th window title is under the strict -// management of the emulator. - -bool GSWndDX::SetWindowText(const char* title) -{ - if(!m_managed) return false; - - ::SetWindowText(m_hWnd, title); - - return m_frame; -} - -void GSWndDX::Show() -{ - if(!m_managed) return; - - SetForegroundWindow(m_hWnd); - ShowWindow(m_hWnd, SW_SHOWNORMAL); - UpdateWindow(m_hWnd); -} - -void GSWndDX::Hide() -{ - if(!m_managed) return; - - ShowWindow(m_hWnd, SW_HIDE); -} - -void GSWndDX::HideFrame() -{ - if(!m_managed) return; - - SetWindowLong(m_hWnd, GWL_STYLE, GetWindowLong(m_hWnd, GWL_STYLE) & ~(WS_CAPTION|WS_THICKFRAME)); - SetWindowPos(m_hWnd, NULL, 0, 0, 0, 0, SWP_NOSIZE | SWP_NOMOVE | SWP_NOZORDER | SWP_NOACTIVATE); - SetMenu(m_hWnd, NULL); - - m_frame = false; -} -#endif diff --git a/plugins/GSdx_legacy/GSWndDX.h b/plugins/GSdx_legacy/GSWndDX.h deleted file mode 100644 index 1cc96138e0..0000000000 --- a/plugins/GSdx_legacy/GSWndDX.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2007-2012 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSWnd.h" - -#ifdef _WIN32 -class GSWndDX : public GSWnd -{ - HWND m_hWnd; - - bool m_frame; - - static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); - virtual LRESULT OnMessage(UINT message, WPARAM wParam, LPARAM lParam); - -public: - GSWndDX(); - virtual ~GSWndDX(); - - bool Create(const string& title, int w, int h); - bool Attach(void* handle, bool managed = true); - void Detach(); - - void* GetDisplay() {return m_hWnd;} - void* GetHandle() {return m_hWnd;} - GSVector4i GetClientRect(); - bool SetWindowText(const char* title); - - void Show(); - void Hide(); - void HideFrame(); -}; -#endif diff --git a/plugins/GSdx_legacy/GSWndEGL.cpp b/plugins/GSdx_legacy/GSWndEGL.cpp deleted file mode 100644 index 3b51fe02d1..0000000000 --- a/plugins/GSdx_legacy/GSWndEGL.cpp +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (C) 2007-2012 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSWndEGL.h" - -#if defined(__linux__) && defined(EGL_SUPPORTED) - -GSWndEGL::GSWndEGL() - : m_NativeWindow(0), m_NativeDisplay(NULL) -{ -} - -void GSWndEGL::CreateContext(int major, int minor) -{ - EGLConfig eglConfig; - EGLint numConfigs = 0; - EGLint contextAttribs[] = - { - EGL_CONTEXT_MAJOR_VERSION_KHR, major, - EGL_CONTEXT_MINOR_VERSION_KHR, minor, -#ifdef ENABLE_OGL_DEBUG - EGL_CONTEXT_FLAGS_KHR, EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR, -#else - // Open Source isn't happy with an unsupported flags... - //EGL_CONTEXT_FLAGS_KHR, GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR, -#endif - EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR, - EGL_NONE - }; - EGLint NullContextAttribs[] = { EGL_NONE }; - EGLint attrList[] = { - EGL_RED_SIZE, 8, - EGL_GREEN_SIZE, 8, - EGL_BLUE_SIZE, 8, - EGL_DEPTH_SIZE, 24, - EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT, - EGL_NONE - }; - - eglBindAPI(EGL_OPENGL_API); - - eglChooseConfig(m_eglDisplay, attrList, &eglConfig, 1, &numConfigs); - if ( numConfigs == 0 ) - { - fprintf(stderr,"EGL: Failed to get a frame buffer config! (0x%x)\n", eglGetError() ); - throw GSDXRecoverableError(); - } - - m_eglSurface = eglCreateWindowSurface(m_eglDisplay, eglConfig, m_NativeWindow, NULL); - if ( m_eglSurface == EGL_NO_SURFACE ) - { - fprintf(stderr,"EGL: Failed to get a window surface\n"); - throw GSDXRecoverableError(); - } - - m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, contextAttribs); - EGLint status = eglGetError(); - if (status == EGL_BAD_ATTRIBUTE || status == EGL_BAD_MATCH) { - // Radeon/Gallium don't support advance attribute. Fallback to random value - // Note: Intel gives an EGL_BAD_MATCH. I don't know why but let's by stubborn and retry. - fprintf(stderr, "EGL: warning your driver doesn't support advance openGL context attributes\n"); - m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, NullContextAttribs); - status = eglGetError(); - } - if ( m_eglContext == EGL_NO_CONTEXT ) - { - fprintf(stderr,"EGL: Failed to create the context\n"); - fprintf(stderr,"EGL STATUS: %x\n", status); - throw GSDXRecoverableError(); - } - - if ( !eglMakeCurrent(m_eglDisplay, m_eglSurface, m_eglSurface, m_eglContext) ) - { - throw GSDXRecoverableError(); - } -} - -void GSWndEGL::AttachContext() -{ - if (!IsContextAttached()) { - // The setting of the API is local to a thread. This function - // can be called from 2 threads. - eglBindAPI(EGL_OPENGL_API); - - //fprintf(stderr, "Attach the context\n"); - eglMakeCurrent(m_eglDisplay, m_eglSurface, m_eglSurface, m_eglContext); - m_ctx_attached = true; - } -} - -void GSWndEGL::DetachContext() -{ - if (IsContextAttached()) { - //fprintf(stderr, "Detach the context\n"); - eglMakeCurrent(m_eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); - m_ctx_attached = false; - } -} - -void GSWndEGL::CheckContext() -{ - fprintf(stderr,"EGL: %s : %s\n", eglQueryString(m_eglDisplay, EGL_VENDOR) , eglQueryString(m_eglDisplay, EGL_VERSION) ); - fprintf(stderr,"EGL: extensions supported: %s\n", eglQueryString(m_eglDisplay, EGL_EXTENSIONS)); -} - -bool GSWndEGL::Attach(void* handle, bool managed) -{ - m_NativeWindow = *(Window*)handle; - m_managed = managed; - - m_NativeDisplay = XOpenDisplay(NULL); - OpenEGLDisplay(); - - CreateContext(3, 3); - - AttachContext(); - - CheckContext(); - - PopulateGlFunction(); - - return true; -} - -void GSWndEGL::Detach() -{ - // Actually the destructor is not called when there is only a GSclose/GSshutdown - // The window still need to be closed - DetachContext(); - eglDestroyContext(m_eglDisplay, m_eglContext); - m_eglContext = NULL; - eglDestroySurface(m_eglDisplay, m_eglSurface); - m_eglSurface = NULL; - CloseEGLDisplay(); - - if (m_NativeDisplay) { - XCloseDisplay(m_NativeDisplay); - m_NativeDisplay = NULL; - } -} - -bool GSWndEGL::Create(const string& title, int w, int h) -{ - if(m_NativeWindow) - throw GSDXRecoverableError(); - - if(w <= 0 || h <= 0) { - w = theApp.GetConfig("ModeWidth", 640); - h = theApp.GetConfig("ModeHeight", 480); - } - - m_managed = true; - - // note this part must be only executed when replaying .gs debug file - m_NativeDisplay = XOpenDisplay(NULL); - OpenEGLDisplay(); - - m_NativeWindow = XCreateSimpleWindow(m_NativeDisplay, DefaultRootWindow(m_NativeDisplay), 0, 0, w, h, 0, 0, 0); - XMapWindow (m_NativeDisplay, m_NativeWindow); - - CreateContext(3, 3); - - AttachContext(); - - CheckContext(); - - PopulateGlFunction(); - - if (m_NativeWindow == 0) - throw GSDXRecoverableError(); - - return true; -} - -void* GSWndEGL::GetProcAddress(const char* name, bool opt) -{ - void* ptr = (void*)eglGetProcAddress(name); - if (ptr == NULL) { - fprintf(stderr, "Failed to find %s\n", name); - if (!opt) - throw GSDXRecoverableError(); - } - return ptr; -} - -void* GSWndEGL::GetDisplay() -{ - // note this part must be only executed when replaying .gs debug file - return (void*)m_NativeDisplay; -} - -GSVector4i GSWndEGL::GetClientRect() -{ - unsigned int h = 480; - unsigned int w = 640; - - unsigned int borderDummy; - unsigned int depthDummy; - Window winDummy; - int xDummy; - int yDummy; - - if (!m_NativeDisplay) m_NativeDisplay = XOpenDisplay(NULL); - XGetGeometry(m_NativeDisplay, m_NativeWindow, &winDummy, &xDummy, &yDummy, &w, &h, &borderDummy, &depthDummy); - - return GSVector4i(0, 0, (int)w, (int)h); -} - -// Returns FALSE if the window has no title, or if th window title is under the strict -// management of the emulator. - -bool GSWndEGL::SetWindowText(const char* title) -{ - if (!m_managed) return true; - - XTextProperty prop; - - memset(&prop, 0, sizeof(prop)); - - char* ptitle = (char*)title; - if (XStringListToTextProperty(&ptitle, 1, &prop)) { - XSetWMName(m_NativeDisplay, m_NativeWindow, &prop); - } - - XFree(prop.value); - XFlush(m_NativeDisplay); - - return true; -} - -void GSWndEGL::SetVSync(bool enable) -{ - // 0 -> disable vsync - // n -> wait n frame - eglSwapInterval(m_eglDisplay, enable); -} - -void GSWndEGL::Flip() -{ - eglSwapBuffers(m_eglDisplay, m_eglSurface); -} - -void GSWndEGL::Show() -{ - XMapRaised(m_NativeDisplay, m_NativeWindow); - XFlush(m_NativeDisplay); -} - -void GSWndEGL::Hide() -{ - XUnmapWindow(m_NativeDisplay, m_NativeWindow); - XFlush(m_NativeDisplay); -} - -void GSWndEGL::HideFrame() -{ - // TODO -} - -void GSWndEGL::CloseEGLDisplay() -{ - eglReleaseThread(); - eglTerminate(m_eglDisplay); -} - -void GSWndEGL::OpenEGLDisplay() -{ - // Create an EGL display from the native display - m_eglDisplay = eglGetDisplay((EGLNativeDisplayType)m_NativeDisplay); - if ( m_eglDisplay == EGL_NO_DISPLAY ) { - fprintf(stderr,"EGL: Failed to open a display! (0x%x)\n", eglGetError() ); - throw GSDXRecoverableError(); - } - - if ( !eglInitialize(m_eglDisplay, NULL, NULL) ) { - fprintf(stderr,"EGL: Failed to initialize the display! (0x%x)\n", eglGetError() ); - throw GSDXRecoverableError(); - } -} - -#endif diff --git a/plugins/GSdx_legacy/GSWndEGL.h b/plugins/GSdx_legacy/GSWndEGL.h deleted file mode 100644 index 17804bb726..0000000000 --- a/plugins/GSdx_legacy/GSWndEGL.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2007-2012 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "GSWnd.h" - -#if defined(__linux__) && defined(EGL_SUPPORTED) -#include -#include -#include - -class GSWndEGL : public GSWndGL -{ - EGLNativeWindowType m_NativeWindow; - EGLNativeDisplayType m_NativeDisplay; - - EGLDisplay m_eglDisplay; - EGLSurface m_eglSurface; - EGLContext m_eglContext; - - void CreateContext(int major, int minor); - void CheckContext(); - - void OpenEGLDisplay(); - void CloseEGLDisplay(); - -public: - GSWndEGL(); - virtual ~GSWndEGL() {}; - - bool Create(const string& title, int w, int h); - bool Attach(void* handle, bool managed = true); - void Detach(); - - void* GetDisplay(); - void* GetHandle() {return (void*)m_NativeWindow;} - GSVector4i GetClientRect(); - bool SetWindowText(const char* title); - - void AttachContext(); - void DetachContext(); - void* GetProcAddress(const char* name, bool opt = false); - - void Show(); - void Hide(); - void HideFrame(); - void Flip(); - void SetVSync(bool enable); -}; - -#endif diff --git a/plugins/GSdx_legacy/GSWndOGL.cpp b/plugins/GSdx_legacy/GSWndOGL.cpp deleted file mode 100644 index 5537f2d555..0000000000 --- a/plugins/GSdx_legacy/GSWndOGL.cpp +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright (C) 2007-2012 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSWndOGL.h" - -#if defined(__linux__) -GSWndOGL::GSWndOGL() - : m_NativeWindow(0), m_NativeDisplay(NULL), m_context(0), m_swapinterval(NULL) -{ -} - -static bool ctxError = false; -static int ctxErrorHandler(Display *dpy, XErrorEvent *ev) -{ - ctxError = true; - return 0; -} - -void GSWndOGL::CreateContext(int major, int minor) -{ - if ( !m_NativeDisplay || !m_NativeWindow ) - { - fprintf( stderr, "Wrong X11 display/window\n" ); - throw GSDXRecoverableError(); - } - - // Get visual information - static int attrListDbl[] = - { - // GLX_X_RENDERABLE: If True is specified, then only frame buffer configurations that have associated X - // visuals (and can be used to render to Windows and/or GLX pixmaps) will be considered. The default value is GLX_DONT_CARE. - GLX_X_RENDERABLE , True, - GLX_RED_SIZE , 8, - GLX_GREEN_SIZE , 8, - GLX_BLUE_SIZE , 8, - GLX_DEPTH_SIZE , 24, - GLX_DOUBLEBUFFER , True, - None - }; - - PFNGLXCHOOSEFBCONFIGPROC glX_ChooseFBConfig = (PFNGLXCHOOSEFBCONFIGPROC) glXGetProcAddress((GLubyte *) "glXChooseFBConfig"); - int fbcount = 0; - GLXFBConfig *fbc = glX_ChooseFBConfig(m_NativeDisplay, DefaultScreen(m_NativeDisplay), attrListDbl, &fbcount); - if (!fbc || fbcount < 1) { - throw GSDXRecoverableError(); - } - - PFNGLXCREATECONTEXTATTRIBSARBPROC glX_CreateContextAttribsARB = (PFNGLXCREATECONTEXTATTRIBSARBPROC)glXGetProcAddress((const GLubyte*) "glXCreateContextAttribsARB"); - if (!glX_CreateContextAttribsARB) { - throw GSDXRecoverableError(); - } - - // Install a dummy handler to handle gracefully (aka not segfault) the support of GL version - int (*oldHandler)(Display*, XErrorEvent*) = XSetErrorHandler(&ctxErrorHandler); - // Be sure the handler is installed - XSync( m_NativeDisplay, false); - - // Create a context - int context_attribs[] = - { - GLX_CONTEXT_MAJOR_VERSION_ARB, major, - GLX_CONTEXT_MINOR_VERSION_ARB, minor, -#ifdef ENABLE_OGL_DEBUG - GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB, -#else - // Open Source isn't happy with an unsupported flags... - //GLX_CONTEXT_FLAGS_ARB, GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR, -#endif - GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB, - None - }; - - m_context = glX_CreateContextAttribsARB(m_NativeDisplay, fbc[0], 0, true, context_attribs); - XFree(fbc); - - // Don't forget to reinstall the older Handler - XSetErrorHandler(oldHandler); - - // Get latest error - XSync( m_NativeDisplay, false); - - if (!m_context || ctxError) { - fprintf(stderr, "Failed to create the opengl context. Check your drivers support openGL %d.%d. Hint: opensource drivers don't\n", major, minor ); - throw GSDXRecoverableError(); - } -} - -void GSWndOGL::AttachContext() -{ - if (!IsContextAttached()) { - //fprintf(stderr, "Attach the context\n"); - glXMakeCurrent(m_NativeDisplay, m_NativeWindow, m_context); - m_ctx_attached = true; - } -} - -void GSWndOGL::DetachContext() -{ - if (IsContextAttached()) { - //fprintf(stderr, "Detach the context\n"); - glXMakeCurrent(m_NativeDisplay, None, NULL); - m_ctx_attached = false; - } -} - -void GSWndOGL::CheckContext() -{ - int glxMajorVersion, glxMinorVersion; - glXQueryVersion(m_NativeDisplay, &glxMajorVersion, &glxMinorVersion); - if (glXIsDirect(m_NativeDisplay, m_context)) - fprintf(stdout, "glX-Version %d.%d with Direct Rendering\n", glxMajorVersion, glxMinorVersion); - else { - fprintf(stderr, "glX-Version %d.%d with Indirect Rendering !!! It won't support properly opengl\n", glxMajorVersion, glxMinorVersion); - throw GSDXRecoverableError(); - } -} - -bool GSWndOGL::Attach(void* handle, bool managed) -{ - m_NativeWindow = *(Window*)handle; - m_managed = managed; - - m_NativeDisplay = XOpenDisplay(NULL); - - CreateContext(3, 3); - - AttachContext(); - - CheckContext(); - - m_swapinterval = (PFNGLXSWAPINTERVALEXTPROC)glXGetProcAddress((const GLubyte*) "glXSwapIntervalEXT"); - - PopulateGlFunction(); - - return true; -} - -void GSWndOGL::Detach() -{ - // Actually the destructor is not called when there is only a GSclose/GSshutdown - // The window still need to be closed - DetachContext(); - if (m_context) glXDestroyContext(m_NativeDisplay, m_context); - - if (m_NativeDisplay) { - XCloseDisplay(m_NativeDisplay); - m_NativeDisplay = NULL; - } -} - -bool GSWndOGL::Create(const string& title, int w, int h) -{ - if(m_NativeWindow) - throw GSDXRecoverableError(); - - if(w <= 0 || h <= 0) { - w = theApp.GetConfig("ModeWidth", 640); - h = theApp.GetConfig("ModeHeight", 480); - } - - m_managed = true; - - // note this part must be only executed when replaying .gs debug file - m_NativeDisplay = XOpenDisplay(NULL); - - m_NativeWindow = XCreateSimpleWindow(m_NativeDisplay, DefaultRootWindow(m_NativeDisplay), 0, 0, w, h, 0, 0, 0); - XMapWindow (m_NativeDisplay, m_NativeWindow); - - if (m_NativeWindow == 0) - throw GSDXRecoverableError(); - - CreateContext(3, 3); - - AttachContext(); - - CheckContext(); - - m_swapinterval = (PFNGLXSWAPINTERVALEXTPROC)glXGetProcAddress((const GLubyte*) "glXSwapIntervalEXT"); - - PopulateGlFunction(); - - return true; -} - -void* GSWndOGL::GetProcAddress(const char* name, bool opt) -{ - void* ptr = (void*)glXGetProcAddress((const GLubyte*)name); - if (ptr == NULL) { - fprintf(stderr, "Failed to find %s\n", name); - if (!opt) - throw GSDXRecoverableError(); - } - return ptr; -} - -void* GSWndOGL::GetDisplay() -{ - // note this part must be only executed when replaying .gs debug file - return (void*)m_NativeDisplay; -} - -GSVector4i GSWndOGL::GetClientRect() -{ - unsigned int h = 480; - unsigned int w = 640; - - unsigned int borderDummy; - unsigned int depthDummy; - Window winDummy; - int xDummy; - int yDummy; - - if (!m_NativeDisplay) m_NativeDisplay = XOpenDisplay(NULL); - XGetGeometry(m_NativeDisplay, m_NativeWindow, &winDummy, &xDummy, &yDummy, &w, &h, &borderDummy, &depthDummy); - - return GSVector4i(0, 0, (int)w, (int)h); -} - -// Returns FALSE if the window has no title, or if th window title is under the strict -// management of the emulator. - -bool GSWndOGL::SetWindowText(const char* title) -{ - if (!m_managed) return true; - - XTextProperty prop; - - memset(&prop, 0, sizeof(prop)); - - char* ptitle = (char*)title; - if (XStringListToTextProperty(&ptitle, 1, &prop)) { - XSetWMName(m_NativeDisplay, m_NativeWindow, &prop); - } - - XFree(prop.value); - XFlush(m_NativeDisplay); - - return true; -} - -void GSWndOGL::SetVSync(bool enable) -{ - // m_swapinterval uses an integer as parameter - // 0 -> disable vsync - // n -> wait n frame - if (m_swapinterval) m_swapinterval(m_NativeDisplay, m_NativeWindow, (int)enable); -} - -void GSWndOGL::Flip() -{ - glXSwapBuffers(m_NativeDisplay, m_NativeWindow); -} - -void GSWndOGL::Show() -{ - XMapRaised(m_NativeDisplay, m_NativeWindow); - XFlush(m_NativeDisplay); -} - -void GSWndOGL::Hide() -{ - XUnmapWindow(m_NativeDisplay, m_NativeWindow); - XFlush(m_NativeDisplay); -} - -void GSWndOGL::HideFrame() -{ - // TODO -} - -#endif diff --git a/plugins/GSdx_legacy/GSWndOGL.h b/plugins/GSdx_legacy/GSWndOGL.h deleted file mode 100644 index 7f71c049ab..0000000000 --- a/plugins/GSdx_legacy/GSWndOGL.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2007-2012 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "GSWnd.h" - -#if defined(__linux__) -#include -#include - -class GSWndOGL final : public GSWndGL -{ - Window m_NativeWindow; - Display* m_NativeDisplay; - GLXContext m_context; - - PFNGLXSWAPINTERVALEXTPROC m_swapinterval; - - void CreateContext(int major, int minor); - void CheckContext(); - -public: - GSWndOGL(); - virtual ~GSWndOGL() {}; - - bool Create(const string& title, int w, int h); - bool Attach(void* handle, bool managed = true); - void Detach(); - - void* GetDisplay(); - void* GetHandle() {return (void*)m_NativeWindow;} - GSVector4i GetClientRect(); - bool SetWindowText(const char* title); - - void AttachContext(); - void DetachContext(); - void* GetProcAddress(const char* name, bool opt = false); - - void Show(); - void Hide(); - void HideFrame(); - void Flip(); - void SetVSync(bool enable); -}; - -#endif diff --git a/plugins/GSdx_legacy/GSWndWGL.cpp b/plugins/GSdx_legacy/GSWndWGL.cpp deleted file mode 100644 index e7af7dcae1..0000000000 --- a/plugins/GSdx_legacy/GSWndWGL.cpp +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (C) 2007-2012 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSWndWGL.h" - -#ifdef _WIN32 -GSWndWGL::GSWndWGL() - : m_NativeWindow(NULL), m_NativeDisplay(NULL), m_context(NULL) -{ -} - -// Used by GSReplay. Perhaps the stuff used by GSReplay can be moved out? That way all -// the GSOpen 1 stuff can be removed. But that'll take a bit of thinking. -LRESULT CALLBACK GSWndWGL::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) -{ - switch (message) - { - case WM_CLOSE: - // This takes place before GSClose, so don't destroy the Window so we can clean up. - ShowWindow(hWnd, SW_HIDE); - // DestroyWindow(hWnd); - return 0; - default: - return DefWindowProc(hWnd, message, wParam, lParam); - } -} - - -bool GSWndWGL::CreateContext(int major, int minor) -{ - if ( !m_NativeDisplay || !m_NativeWindow ) - { - fprintf( stderr, "Wrong display/window\n" ); - exit(1); - } - - // GL2 context are quite easy but we need GL3 which is another painful story... - m_context = wglCreateContext(m_NativeDisplay); - if (!m_context) { - fprintf(stderr, "Failed to create a 2.0 context\n"); - return false; - } - - // FIXME test it - // Note: albeit every tutorial said that we need an opengl context to use the GL function wglCreateContextAttribsARB - // On linux it works without the extra temporary context, not sure the limitation still applied - if (major >= 3) { - AttachContext(); - - // Create a context - int context_attribs[] = - { - WGL_CONTEXT_MAJOR_VERSION_ARB, major, - WGL_CONTEXT_MINOR_VERSION_ARB, minor, - // FIXME : Request a debug context to ease opengl development - // Note: don't support deprecated feature (pre openg 3.1) - //GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB | GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB, - WGL_CONTEXT_FLAGS_ARB, WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB -#ifdef ENABLE_OGL_DEBUG - | WGL_CONTEXT_DEBUG_BIT_ARB -#else - | GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR -#endif - , - WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB, - 0 - }; - - PFNWGLCREATECONTEXTATTRIBSARBPROC wglCreateContextAttribsARB = (PFNWGLCREATECONTEXTATTRIBSARBPROC)wglGetProcAddress("wglCreateContextAttribsARB"); - if (!wglCreateContextAttribsARB) { - fprintf(stderr, "Failed to init wglCreateContextAttribsARB function pointer\n"); - return false; - } - - HGLRC context30 = wglCreateContextAttribsARB(m_NativeDisplay, NULL, context_attribs); - if (!context30) { - fprintf(stderr, "Failed to create a 3.x context\n"); - return false; - } - - DetachContext(); - wglDeleteContext(m_context); - - m_context = context30; - fprintf(stderr, "3.x GL context successfully created\n"); - } - - return true; -} - -void GSWndWGL::AttachContext() -{ - if (!IsContextAttached()) { - wglMakeCurrent(m_NativeDisplay, m_context); - m_ctx_attached = true; - } -} - -void GSWndWGL::DetachContext() -{ - if (IsContextAttached()) { - wglMakeCurrent(NULL, NULL); - m_ctx_attached = false; - } -} - -//TODO: DROP ??? -void GSWndWGL::CheckContext() -{ -#if 0 - int glxMajorVersion, glxMinorVersion; - glXQueryVersion(m_NativeDisplay, &glxMajorVersion, &glxMinorVersion); - if (glXIsDirect(m_NativeDisplay, m_context)) - fprintf(stderr, "glX-Version %d.%d with Direct Rendering\n", glxMajorVersion, glxMinorVersion); - else - fprintf(stderr, "glX-Version %d.%d with Indirect Rendering !!! It won't support properly opengl\n", glxMajorVersion, glxMinorVersion); -#endif -} - -bool GSWndWGL::Attach(void* handle, bool managed) -{ - m_NativeWindow = (HWND)handle; - m_managed = managed; - - if (!OpenWGLDisplay()) return false; - - if (!CreateContext(3, 3)) return false; - - AttachContext(); - - CheckContext(); - - m_swapinterval = (PFNWGLSWAPINTERVALEXTPROC)wglGetProcAddress("wglSwapIntervalEXT"); - - PopulateGlFunction(); - - UpdateWindow(m_NativeWindow); - - return true; -} - -void GSWndWGL::Detach() -{ - // Actually the destructor is not called when there is only a GSclose/GSshutdown - // The window still need to be closed - DetachContext(); - - if (m_context) wglDeleteContext(m_context); - m_context = NULL; - - CloseWGLDisplay(); - - // Used by GSReplay. - if (m_NativeWindow && m_managed) - { - DestroyWindow(m_NativeWindow); - m_NativeWindow = NULL; - } - -} - -bool GSWndWGL::OpenWGLDisplay() -{ - GLuint PixelFormat; // Holds The Results After Searching For A Match - PIXELFORMATDESCRIPTOR pfd = // pfd Tells Windows How We Want Things To Be - - { - sizeof(PIXELFORMATDESCRIPTOR), // Size Of This Pixel Format Descriptor - 1, // Version Number - PFD_DRAW_TO_WINDOW | // Format Must Support Window - PFD_SUPPORT_OPENGL | // Format Must Support OpenGL - PFD_DOUBLEBUFFER, // Must Support Double Buffering - PFD_TYPE_RGBA, // Request An RGBA Format - 32, // Select Our Color Depth - 0, 0, 0, 0, 0, 0, // Color Bits Ignored - 0, // 8bit Alpha Buffer - 0, // Shift Bit Ignored - 0, // No Accumulation Buffer - 0, 0, 0, 0, // Accumulation Bits Ignored - 24, // 24Bit Z-Buffer (Depth Buffer) - 8, // 8bit Stencil Buffer - 0, // No Auxiliary Buffer - PFD_MAIN_PLANE, // Main Drawing Layer - 0, // Reserved - 0, 0, 0 // Layer Masks Ignored - }; - - m_NativeDisplay = GetDC(m_NativeWindow); - if (!m_NativeDisplay) - { - MessageBox(NULL, "(1) Can't Create A GL Device Context.", "ERROR", MB_OK | MB_ICONEXCLAMATION); - return false; - } - PixelFormat = ChoosePixelFormat(m_NativeDisplay, &pfd); - if (!PixelFormat) - { - MessageBox(NULL, "(2) Can't Find A Suitable PixelFormat.", "ERROR", MB_OK | MB_ICONEXCLAMATION); - return false; - } - - if (!SetPixelFormat(m_NativeDisplay, PixelFormat, &pfd)) - { - MessageBox(NULL, "(3) Can't Set The PixelFormat.", "ERROR", MB_OK | MB_ICONEXCLAMATION); - return false; - } - - return true; -} - -void GSWndWGL::CloseWGLDisplay() -{ - if (m_NativeDisplay && !ReleaseDC(m_NativeWindow, m_NativeDisplay)) // Are We Able To Release The DC - { - MessageBox(NULL, "Release Device Context Failed.", "SHUTDOWN ERROR", MB_OK | MB_ICONINFORMATION); - } - m_NativeDisplay = NULL; // Set DC To NULL -} - -//TODO: GSopen 1 => Drop? -// Used by GSReplay. At least for now. -// More or less copy pasted from GSWndDX::Create and GSWndWGL::Attach with a few -// modifications -bool GSWndWGL::Create(const string& title, int w, int h) -{ - if(m_NativeWindow) return false; - - m_managed = true; - - WNDCLASS wc; - - memset(&wc, 0, sizeof(wc)); - - wc.style = CS_HREDRAW | CS_VREDRAW | CS_DBLCLKS | CS_OWNDC; - wc.lpfnWndProc = WndProc; - wc.hInstance = theApp.GetModuleHandle(); - wc.hCursor = LoadCursor(NULL, IDC_ARROW); - wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH); - wc.lpszClassName = "GSWndOGL"; - - if (!GetClassInfo(wc.hInstance, wc.lpszClassName, &wc)) - { - if (!RegisterClass(&wc)) - { - return false; - } - } - - DWORD style = WS_CLIPCHILDREN | WS_CLIPSIBLINGS | WS_OVERLAPPEDWINDOW | WS_BORDER; - - GSVector4i r; - - GetWindowRect(GetDesktopWindow(), r); - - // Old GSOpen ModeWidth and ModeHeight are not necessary with this. - bool remote = !!GetSystemMetrics(SM_REMOTESESSION); - - if (w <= 0 || h <= 0 || remote) - { - w = r.width() / 3; - h = r.width() / 4; - - if (!remote) - { - w *= 2; - h *= 2; - } - } - - r.left = (r.left + r.right - w) / 2; - r.top = (r.top + r.bottom - h) / 2; - r.right = r.left + w; - r.bottom = r.top + h; - - AdjustWindowRect(r, style, FALSE); - - m_NativeWindow = CreateWindow(wc.lpszClassName, title.c_str(), style, r.left, r.top, r.width(), r.height(), NULL, NULL, wc.hInstance, (LPVOID)this); - - if (m_NativeWindow == NULL) return false; - - if (!OpenWGLDisplay()) return false; - - if (!CreateContext(3, 3)) return false; - - AttachContext(); - - m_swapinterval = (PFNWGLSWAPINTERVALEXTPROC)wglGetProcAddress("wglSwapIntervalEXT"); - - PopulateGlFunction(); - - return true; - -} - -//Same as DX -GSVector4i GSWndWGL::GetClientRect() -{ - GSVector4i r; - - ::GetClientRect(m_NativeWindow, r); - - return r; -} - -void* GSWndWGL::GetProcAddress(const char* name, bool opt) -{ - void* ptr = (void*)wglGetProcAddress(name); - if (ptr == NULL) { - fprintf(stderr, "Failed to find %s\n", name); - if (!opt) - throw GSDXRecoverableError(); - } - return ptr; -} - -//TODO: check extensions supported or not -//FIXME : extension allocation -void GSWndWGL::SetVSync(bool enable) -{ - // m_swapinterval uses an integer as parameter - // 0 -> disable vsync - // n -> wait n frame - if (m_swapinterval) m_swapinterval((int)enable); -} - -void GSWndWGL::Flip() -{ - SwapBuffers(m_NativeDisplay); -} - -void GSWndWGL::Show() -{ - if (!m_managed) return; - - // Used by GSReplay - SetForegroundWindow(m_NativeWindow); - ShowWindow(m_NativeWindow, SW_SHOWNORMAL); - UpdateWindow(m_NativeWindow); -} - -void GSWndWGL::Hide() -{ -} - -void GSWndWGL::HideFrame() -{ -} - -// Returns FALSE if the window has no title, or if th window title is under the strict -// management of the emulator. - -bool GSWndWGL::SetWindowText(const char* title) -{ - if (!m_managed) return false; - - // Used by GSReplay. - ::SetWindowText(m_NativeWindow, title); - - return true; -} - - -#endif diff --git a/plugins/GSdx_legacy/GSWndWGL.h b/plugins/GSdx_legacy/GSWndWGL.h deleted file mode 100644 index 4c477b7c6c..0000000000 --- a/plugins/GSdx_legacy/GSWndWGL.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2007-2012 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "GSWnd.h" - -#ifdef _WIN32 - -class GSWndWGL : public GSWndGL -{ - HWND m_NativeWindow; - HDC m_NativeDisplay; - HGLRC m_context; - - PFNWGLSWAPINTERVALEXTPROC m_swapinterval; - - bool CreateContext(int major, int minor); - void CheckContext(); - - void CloseWGLDisplay(); - bool OpenWGLDisplay(); - - static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); - -public: - GSWndWGL(); - virtual ~GSWndWGL() {}; - - bool Create(const string& title, int w, int h); - bool Attach(void* handle, bool managed = true); - void Detach(); - - void* GetDisplay() {return m_NativeWindow;} - void* GetHandle() {return m_NativeWindow;} - GSVector4i GetClientRect(); - bool SetWindowText(const char* title); - - void AttachContext(); - void DetachContext(); - void* GetProcAddress(const char* name, bool opt); - - void Show(); - void Hide(); - void HideFrame(); - void Flip(); - void SetVSync(bool enable); -}; - -#endif diff --git a/plugins/GSdx_legacy/GSdx.cpp b/plugins/GSdx_legacy/GSdx.cpp deleted file mode 100644 index bd2a7ec564..0000000000 --- a/plugins/GSdx_legacy/GSdx.cpp +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSdx.h" -#include "GS.h" - -static void* s_hModule; - -#ifdef _WIN32 - -BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) -{ - switch(ul_reason_for_call) - { - case DLL_PROCESS_ATTACH: - s_hModule = hModule; - case DLL_THREAD_ATTACH: - case DLL_THREAD_DETACH: - case DLL_PROCESS_DETACH: - break; - } - - return TRUE; -} - -bool GSdxApp::LoadResource(int id, vector& buff, const char* type) -{ - buff.clear(); - HRSRC hRsrc = FindResource((HMODULE)s_hModule, MAKEINTRESOURCE(id), type != NULL ? type : RT_RCDATA); - if(!hRsrc) return false; - HGLOBAL hGlobal = ::LoadResource((HMODULE)s_hModule, hRsrc); - if(!hGlobal) return false; - DWORD size = SizeofResource((HMODULE)s_hModule, hRsrc); - if(!size) return false; - buff.resize(size); - memcpy(buff.data(), LockResource(hGlobal), size); - return true; -} - -#else - -bool GSdxApp::LoadResource(int id, vector& buff, const char* type) -{ - buff.clear(); - printf("LoadResource not implemented\n"); - return false; -} - -size_t GSdxApp::GetPrivateProfileString(const char* lpAppName, const char* lpKeyName, const char* lpDefault, char* lpReturnedString, size_t nSize, const char* lpFileName) -{ - BuildConfigurationMap(lpFileName); - - std::string key(lpKeyName); - std::string value = m_configuration_map[key]; - if (value.empty()) { - // save the value for futur call - m_configuration_map[key] = std::string(lpDefault); - strcpy(lpReturnedString, lpDefault); - } else - strcpy(lpReturnedString, value.c_str()); - - return 0; -} - -bool GSdxApp::WritePrivateProfileString(const char* lpAppName, const char* lpKeyName, const char* pString, const char* lpFileName) -{ - BuildConfigurationMap(lpFileName); - - std::string key(lpKeyName); - std::string value(pString); - m_configuration_map[key] = value; - - // Save config to a file - FILE* f = fopen(lpFileName, "w"); - - if (f == NULL) return false; // FIXME print a nice message - - map::iterator it; - for (it = m_configuration_map.begin(); it != m_configuration_map.end(); ++it) { - // Do not save the inifile key which is not an option - if (it->first.compare("inifile") == 0) continue; - - if (!it->second.empty()) - fprintf(f, "%s = %s\n", it->first.c_str(), it->second.c_str()); - } - fclose(f); - - return false; -} - -int GSdxApp::GetPrivateProfileInt(const char* lpAppName, const char* lpKeyName, int nDefault, const char* lpFileName) -{ - BuildConfigurationMap(lpFileName); - - std::string value = m_configuration_map[std::string(lpKeyName)]; - if (value.empty()) { - // save the value for futur call - SetConfig(lpKeyName, nDefault); - return nDefault; - } else - return atoi(value.c_str()); -} -#endif - -GSdxApp theApp; - -GSdxApp::GSdxApp() -{ - m_ini = "inis/GSdx.ini"; - m_section = "Settings"; - -#ifdef _WIN32 - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::OGL_HW), "OpenGL", "Hardware")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX1011_HW), "Direct3D11", "Hardware")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX9_HW), "Direct3D9", "Hardware")); - - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::OGL_SW), "OpenGL", "Software")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX1011_SW), "Direct3D11", "Software")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX9_SW), "Direct3D9", "Software")); - -#ifdef _DEBUG - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX9_Null), "Direct3D9", "Null")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX1011_Null), "Direct3D11", "Null")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::Null_SW), "Null", "Software")); -#endif -#else // Linux - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::OGL_HW), "OpenGL", "Hardware")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::OGL_SW), "OpenGL", "Software")); -#endif - - // The null renderer goes third, it has use for benchmarking purposes in a release build - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::Null_Null), "None", "Core Benchmark")); - -#ifdef ENABLE_OPENCL - // OpenCL stuff goes last - // FIXME openCL isn't attached to a device (could be impacted by the window management stuff however) - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX9_OpenCL), "Direct3D9", "OpenCL")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX1011_OpenCL), "Direct3D11", "OpenCL")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::Null_OpenCL), "Null", "OpenCL")); - m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::OGL_OpenCL), "OpenGL", "OpenCL")); -#endif - - m_gs_interlace.push_back(GSSetting(0, "None", "")); - m_gs_interlace.push_back(GSSetting(1, "Weave tff", "saw-tooth")); - m_gs_interlace.push_back(GSSetting(2, "Weave bff", "saw-tooth")); - m_gs_interlace.push_back(GSSetting(3, "Bob tff", "use blend if shaking")); - m_gs_interlace.push_back(GSSetting(4, "Bob bff", "use blend if shaking")); - m_gs_interlace.push_back(GSSetting(5, "Blend tff", "slight blur, 1/2 fps")); - m_gs_interlace.push_back(GSSetting(6, "Blend bff", "slight blur, 1/2 fps")); - m_gs_interlace.push_back(GSSetting(7, "Auto", "")); - - m_gs_aspectratio.push_back(GSSetting(0, "Stretch", "")); - m_gs_aspectratio.push_back(GSSetting(1, "4:3", "")); - m_gs_aspectratio.push_back(GSSetting(2, "16:9", "")); - - m_gs_upscale_multiplier.push_back(GSSetting(1, "Native", "")); - m_gs_upscale_multiplier.push_back(GSSetting(2, "2x Native", "")); - m_gs_upscale_multiplier.push_back(GSSetting(3, "3x Native", "")); - m_gs_upscale_multiplier.push_back(GSSetting(4, "4x Native", "")); - m_gs_upscale_multiplier.push_back(GSSetting(5, "5x Native", "")); - m_gs_upscale_multiplier.push_back(GSSetting(6, "6x Native", "")); - m_gs_upscale_multiplier.push_back(GSSetting(8, "8x Native", "")); - m_gs_upscale_multiplier.push_back(GSSetting(0, "Custom", "")); - - m_gs_max_anisotropy.push_back(GSSetting(0, "Off", "")); - m_gs_max_anisotropy.push_back(GSSetting(2, "2x", "")); - m_gs_max_anisotropy.push_back(GSSetting(4, "4x", "")); - m_gs_max_anisotropy.push_back(GSSetting(8, "8x", "")); - m_gs_max_anisotropy.push_back(GSSetting(16, "16x", "")); - - m_gs_filter.push_back(GSSetting(0, "Nearest", "")); - m_gs_filter.push_back(GSSetting(1, "Bilinear", "Forced")); - m_gs_filter.push_back(GSSetting(2, "Bilinear", "PS2")); - - m_gs_gl_ext.push_back(GSSetting(-1, "Auto", "")); - m_gs_gl_ext.push_back(GSSetting(0, "Force-Disabled", "")); - m_gs_gl_ext.push_back(GSSetting(1, "Force-Enabled", "")); - - m_gs_hack.push_back(GSSetting(0, "Off", "")); - m_gs_hack.push_back(GSSetting(1, "Half", "")); - m_gs_hack.push_back(GSSetting(2, "Full", "")); - - m_gs_crc_level.push_back(GSSetting(0 , "None", "Debug")); - m_gs_crc_level.push_back(GSSetting(1 , "Minimum", "Debug")); - m_gs_crc_level.push_back(GSSetting(2 , "Partial", "OpenGL Recommended")); - m_gs_crc_level.push_back(GSSetting(3 , "Full", "Safest")); - m_gs_crc_level.push_back(GSSetting(4 , "Aggressive", "")); - - m_gs_acc_blend_level.push_back(GSSetting(0, "None", "Fastest")); - m_gs_acc_blend_level.push_back(GSSetting(1, "Basic", "Recommended low-end PC")); - m_gs_acc_blend_level.push_back(GSSetting(2, "Medium", "")); - m_gs_acc_blend_level.push_back(GSSetting(3, "High", "Recommended high-end PC")); - m_gs_acc_blend_level.push_back(GSSetting(4, "Full", "Very Slow")); - m_gs_acc_blend_level.push_back(GSSetting(5, "Ultra", "Ultra Slow")); - - m_gs_tv_shaders.push_back(GSSetting(0, "None", "")); - m_gs_tv_shaders.push_back(GSSetting(1, "Scanline filter", "")); - m_gs_tv_shaders.push_back(GSSetting(2, "Diagonal filter", "")); - m_gs_tv_shaders.push_back(GSSetting(3, "Triangular filter", "")); - m_gs_tv_shaders.push_back(GSSetting(4, "Wave filter", "")); - - m_gpu_renderers.push_back(GSSetting(0, "Direct3D9 (Software)", "")); - m_gpu_renderers.push_back(GSSetting(1, "Direct3D11 (Software)", "")); - m_gpu_renderers.push_back(GSSetting(2, "SDL 1.3 (Software)", "")); - m_gpu_renderers.push_back(GSSetting(3, "Null (Software)", "")); - //m_gpu_renderers.push_back(GSSetting(4, "Null (Null)", "")); - - m_gpu_filter.push_back(GSSetting(0, "Nearest", "")); - m_gpu_filter.push_back(GSSetting(1, "Bilinear (polygons only)", "")); - m_gpu_filter.push_back(GSSetting(2, "Bilinear", "")); - - m_gpu_dithering.push_back(GSSetting(0, "Disabled", "")); - m_gpu_dithering.push_back(GSSetting(1, "Auto", "")); - - m_gpu_aspectratio.push_back(GSSetting(0, "Stretch", "")); - m_gpu_aspectratio.push_back(GSSetting(1, "4:3", "")); - m_gpu_aspectratio.push_back(GSSetting(2, "16:9", "")); - - m_gpu_scale.push_back(GSSetting(0 | (0 << 2), "H x 1 - V x 1", "")); - m_gpu_scale.push_back(GSSetting(1 | (0 << 2), "H x 2 - V x 1", "")); - m_gpu_scale.push_back(GSSetting(0 | (1 << 2), "H x 1 - V x 2", "")); - m_gpu_scale.push_back(GSSetting(1 | (1 << 2), "H x 2 - V x 2", "")); - m_gpu_scale.push_back(GSSetting(2 | (1 << 2), "H x 4 - V x 2", "")); - m_gpu_scale.push_back(GSSetting(1 | (2 << 2), "H x 2 - V x 4", "")); - m_gpu_scale.push_back(GSSetting(2 | (2 << 2), "H x 4 - V x 4", "")); -} - -#ifdef __linux__ -void GSdxApp::ReloadConfig() -{ - if (m_configuration_map.empty()) return; - - auto file = m_configuration_map.find("inifile"); - if (file == m_configuration_map.end()) return; - - // A map was built so reload it - std::string filename = file->second; - m_configuration_map.clear(); - BuildConfigurationMap(filename.c_str()); -} - -void GSdxApp::BuildConfigurationMap(const char* lpFileName) -{ - // Check if the map was already built - std::string inifile_value(lpFileName); - if ( inifile_value.compare(m_configuration_map["inifile"]) == 0 ) return; - m_configuration_map["inifile"] = inifile_value; - - // Load config from file - char value[256]; - char key[256]; - FILE* f = fopen(lpFileName, "r"); - - if (f == NULL) return; // FIXME print a nice message - - while( fscanf(f, "%255s = %255s\n", key, value) != EOF ) { - std::string key_s(key); - std::string value_s(value); - m_configuration_map[key_s] = value_s; - } - - fclose(f); -} -#endif - -void* GSdxApp::GetModuleHandlePtr() -{ - return s_hModule; -} - -void GSdxApp::SetConfigDir(const char* dir) -{ - if( dir == NULL ) - { - m_ini = "inis/GSdx.ini"; - } - else - { - m_ini = dir; - - if(m_ini[m_ini.length() - 1] != DIRECTORY_SEPARATOR) - { - m_ini += DIRECTORY_SEPARATOR; - } - - m_ini += "GSdx.ini"; - } -} - -string GSdxApp::GetConfig(const char* entry, const char* value) -{ - char buff[4096] = {0}; - - GetPrivateProfileString(m_section.c_str(), entry, value, buff, countof(buff), m_ini.c_str()); - - return string(buff); -} - -void GSdxApp::SetConfig(const char* entry, const char* value) -{ - WritePrivateProfileString(m_section.c_str(), entry, value, m_ini.c_str()); -} - -int GSdxApp::GetConfig(const char* entry, int value) -{ - return GetPrivateProfileInt(m_section.c_str(), entry, value, m_ini.c_str()); -} - -void GSdxApp::SetConfig(const char* entry, int value) -{ - char buff[32] = {0}; - - sprintf(buff, "%d", value); - - SetConfig(entry, buff); -} diff --git a/plugins/GSdx_legacy/GSdx.def b/plugins/GSdx_legacy/GSdx.def deleted file mode 100644 index 7ab5563e41..0000000000 --- a/plugins/GSdx_legacy/GSdx.def +++ /dev/null @@ -1,71 +0,0 @@ -; GSdx.def : Declares the module parameters for the DLL. - -EXPORTS - ; Explicit exports can go here - PS2EgetLibType - PS2EgetLibName - PS2EgetLibVersion2 - PS2EgetCpuPlatform - GSsetBaseMem - GSinit - GSshutdown - GSopen - GSopen2 - GSclose - GSreset - GSwriteCSR - GSgifSoftReset - GSgifTransfer - GSgifTransfer1 - GSgifTransfer2 - GSgifTransfer3 - GSvsync - GSmakeSnapshot - GSkeyEvent - GSfreeze - GSconfigure - GStest - GSabout - GSinitReadFIFO - GSreadFIFO - GSinitReadFIFO2 - GSreadFIFO2 - GSirqCallback - GSsetupRecording - GSsetGameCRC - GSsetFrameSkip - GSsetFrameLimit - GSsetVsync - GSsetExclusive - GSsetSettingsDir - GSgetLastTag - GSReplay - GSBenchmark - GSgetTitleInfo2 - PSEgetLibType - PSEgetLibName - PSEgetLibVersion - GPUinit - GPUshutdown - GPUopen - GPUclose - GPUconfigure - GPUabout - GPUtest - GPUwriteData - GPUwriteStatus - GPUreadData - GPUreadStatus - GPUdmaChain - GPUgetMode - GPUsetMode - GPUupdateLace - GPUmakeSnapshot - GPUwriteDataMem - GPUreadDataMem - GPUdisplayText - GPUdisplayFlags - GPUfreeze - GPUshowScreenPic - GPUgetScreenPic - GPUcursor diff --git a/plugins/GSdx_legacy/GSdx.gcc.workspace b/plugins/GSdx_legacy/GSdx.gcc.workspace deleted file mode 100644 index 12ac2a87f4..0000000000 --- a/plugins/GSdx_legacy/GSdx.gcc.workspace +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/plugins/GSdx_legacy/GSdx.h b/plugins/GSdx_legacy/GSdx.h deleted file mode 100644 index 5ea4201c4c..0000000000 --- a/plugins/GSdx_legacy/GSdx.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "GSSetting.h" - -class GSdxApp -{ - std::string m_ini; - std::string m_section; -#ifdef __linux__ - std::map< std::string, std::string > m_configuration_map; -#endif - -public: - GSdxApp(); - - void* GetModuleHandlePtr(); - -#ifdef _WIN32 - HMODULE GetModuleHandle() {return (HMODULE)GetModuleHandlePtr();} -#endif - -#ifdef __linux__ - void BuildConfigurationMap(const char* lpFileName); - void ReloadConfig(); - - size_t GetPrivateProfileString(const char* lpAppName, const char* lpKeyName, const char* lpDefault, char* lpReturnedString, size_t nSize, const char* lpFileName); - bool WritePrivateProfileString(const char* lpAppName, const char* lpKeyName, const char* pString, const char* lpFileName); - int GetPrivateProfileInt(const char* lpAppName, const char* lpKeyName, int nDefault, const char* lpFileName); -#endif - - bool LoadResource(int id, vector& buff, const char* type = NULL); - - string GetConfig(const char* entry, const char* value); - void SetConfig(const char* entry, const char* value); - int GetConfig(const char* entry, int value); - void SetConfig(const char* entry, int value); - - void SetConfigDir(const char* dir); - - vector m_gs_renderers; - vector m_gs_interlace; - vector m_gs_aspectratio; - vector m_gs_upscale_multiplier; - vector m_gs_max_anisotropy; - vector m_gs_filter; - vector m_gs_gl_ext; - vector m_gs_hack; - vector m_gs_crc_level; - vector m_gs_acc_blend_level; - vector m_gs_tv_shaders; - - vector m_gpu_renderers; - vector m_gpu_filter; - vector m_gpu_dithering; - vector m_gpu_aspectratio; - vector m_gpu_scale; -}; - -struct GSDXError {}; -struct GSDXRecoverableError : GSDXError {}; - -extern GSdxApp theApp; diff --git a/plugins/GSdx_legacy/GSdx.props b/plugins/GSdx_legacy/GSdx.props deleted file mode 100644 index a2fffcb395..0000000000 --- a/plugins/GSdx_legacy/GSdx.props +++ /dev/null @@ -1,20 +0,0 @@ - - - <_PropertySheetDisplayName>GSdx - $(ProjectName)-$(SSEtype) - - - - Level4 - 4995;4324;%(DisableSpecificWarnings) - - - JITProfiling.lib;d3d11_beta.lib;d3dx11.lib;d3d10.lib;d3d10_1.lib;d3dx10.lib;d3d9.lib;d3dx9.lib;ddraw.lib;dxguid.lib;winmm.lib;strmiids.lib;xinput.lib;cg.lib;cgGL.lib;glut32.lib;glew32.lib;%(AdditionalDependencies) - ./vtune;%(AdditionalLibraryDirectories) - d3d9.dll;d3dx9_41.dll;d3d10.dll;d3d10_1.dll;d3dx10_41.dll;d3d11.dll;d3d11_beta.dll;d3dx11_41.dll;%(DelayLoadDLLs) - - - "$(SolutionDir)common\vsprops\preBuild.cmd" "$(ProjectDir)." - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSdx.rc b/plugins/GSdx_legacy/GSdx.rc deleted file mode 100644 index eb1f09d72b..0000000000 --- a/plugins/GSdx_legacy/GSdx.rc +++ /dev/null @@ -1,375 +0,0 @@ -// Microsoft Visual C++ generated resource script. -// -#include "resource.h" - -#define APSTUDIO_READONLY_SYMBOLS -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 2 resource. -// -#ifndef APSTUDIO_INVOKED -#include "targetver.h" -#endif -#define APSTUDIO_HIDDEN_SYMBOLS -#include "windows.h" -#undef APSTUDIO_HIDDEN_SYMBOLS - -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - -///////////////////////////////////////////////////////////////////////////// -// English (United States) resources - -#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) -LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US -#pragma code_page(1252) - -#ifdef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// TEXTINCLUDE -// - -1 TEXTINCLUDE -BEGIN - "resource.h\0" -END - -2 TEXTINCLUDE -BEGIN - "#ifndef APSTUDIO_INVOKED\r\n" - "#include ""targetver.h""\r\n" - "#endif\r\n" - "#define APSTUDIO_HIDDEN_SYMBOLS\r\n" - "#include ""windows.h""\r\n" - "#undef APSTUDIO_HIDDEN_SYMBOLS\r\n" - "\0" -END - -3 TEXTINCLUDE -BEGIN - "#include ""res/tfx.fx""\r\n" - "#include ""res/convert.fx""\r\n" - "#include ""res/interlace.fx""\r\n" - "#include ""res/merge.fx""\r\n" - "#include ""res/fxaa.fx""\r\n" - "#include ""res/cs.fx""\r\n" - "#include ""res/shadeboost.fx""\r\n" - "#include ""res/tfx.cl""\r\0" -END - -#endif // APSTUDIO_INVOKED - - -///////////////////////////////////////////////////////////////////////////// -// -// RCDATA -// - -IDR_TFX_FX RCDATA "res\\tfx.fx" -IDR_CONVERT_FX RCDATA "res\\convert.fx" -IDR_INTERLACE_FX RCDATA "res\\interlace.fx" -IDR_MERGE_FX RCDATA "res\\merge.fx" -IDR_FXAA_FX RCDATA "res\\fxaa.fx" -IDR_CS_FX RCDATA "res\\cs.fx" -IDR_SHADEBOOST_FX RCDATA "res\\shadeboost.fx" -IDR_TFX_CL RCDATA "res\\tfx.cl" - -///////////////////////////////////////////////////////////////////////////// -// -// Bitmap -// - -IDB_LOGO9 BITMAP "res\\logo9.bmp" -IDB_LOGO10 BITMAP "res\\logo10.bmp" -IDB_LOGOGL BITMAP "res\\logo-ogl.bmp" - -///////////////////////////////////////////////////////////////////////////// -// -// Dialog -// - -IDD_HACKS DIALOGEX 0, 0, 161, 200 -STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU -CAPTION "Hacks Configuration" -FONT 8, "MS Shell Dlg", 400, 0, 0x1 -BEGIN - DEFPUSHBUTTON "OK",IDOK,88,181,66,14 - GROUPBOX "USE AT YOUR OWN RISK!",IDC_STATIC,7,7,147,171,0,WS_EX_TRANSPARENT - CONTROL "Preload Data Frame",IDC_PRELOAD_GS,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,95,80,8 - RTEXT "MSAA:",IDC_MSAA_TEXT,62,20,22,8 - RTEXT "Skipdraw:",IDC_STATIC,52,36,32,8 - EDITTEXT IDC_SKIPDRAWHACKEDIT,88,33,58,14,ES_RIGHT | ES_AUTOHSCROLL - CONTROL "",IDC_SKIPDRAWHACK,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,113,34,11,14 - CONTROL "Alpha",IDC_ALPHAHACK,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,88,64,34,8 - CONTROL "Half-pixel Offset",IDC_OFFSETHACK,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,51,70,8 - COMBOBOX IDC_MSAACB,88,17,58,63,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - CONTROL "Wild Arms Offset",IDC_WILDHACK,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,64,70,8 - CONTROL "Safe accurate blending",IDC_SAFE_FBMASK,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,77,90,8 - CONTROL "Alpha Stencil",IDC_ALPHASTENCIL,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,77,57,8 - CONTROL "Align Sprite",IDC_ALIGN_SPRITE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,88,51,58,8 - RTEXT "TC Offset X:",IDC_STATIC,40,146,44,8 - EDITTEXT IDC_TCOFFSETX2,88,144,58,14,ES_RIGHT | ES_AUTOHSCROLL - CONTROL "",IDC_TCOFFSETX,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,135,145,11,14 - EDITTEXT IDC_TCOFFSETY2,88,162,58,14,ES_RIGHT | ES_AUTOHSCROLL - CONTROL "",IDC_TCOFFSETY,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,135,162,11,14 - RTEXT "TC Offset Y:",IDC_STATIC,36,165,48,8 - COMBOBOX IDC_ROUND_SPRITE,88,109,58,63,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - RTEXT "Round Sprite:",IDC_STATIC,39,111,45,8 - RTEXT "Sprite:",IDC_STATIC,62,127,22,8 - COMBOBOX IDC_SPRITEHACK,88,125,58,63,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP -END - -IDD_SHADER DIALOGEX 0, 0, 248, 250 -STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU -CAPTION "Shader Configuration" -FONT 8, "MS Shell Dlg", 400, 0, 0x1 -BEGIN - DEFPUSHBUTTON "OK",IDOK,69,231,50,14 - DEFPUSHBUTTON "Cancel",IDCANCEL,126,231,50,14 - CONTROL "Enable Shade Boost",IDC_SHADEBOOST,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,46,90,10 - LTEXT "Saturation",IDC_STATIC,15,64,34,8 - CONTROL "",IDC_SATURATION_SLIDER,"msctls_trackbar32",TBS_BOTH | TBS_NOTICKS | WS_TABSTOP,55,62,165,15 - LTEXT "Brightness",IDC_STATIC,15,89,34,8 - CONTROL "",IDC_BRIGHTNESS_SLIDER,"msctls_trackbar32",TBS_BOTH | TBS_NOTICKS | WS_TABSTOP,55,87,165,15 - LTEXT "Contrast",IDC_STATIC,15,114,29,8 - CONTROL "",IDC_CONTRAST_SLIDER,"msctls_trackbar32",TBS_BOTH | TBS_NOTICKS | WS_TABSTOP,55,111,165,15 - RTEXT "100",IDC_SATURATION_TEXT,220,64,15,8 - RTEXT "100",IDC_BRIGHTNESS_TEXT,220,89,15,8 - RTEXT "100",IDC_CONTRAST_TEXT,220,114,15,8 - CONTROL "Enable FXAA",IDC_FXAA,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,188,6,54,10 - CONTROL "Enable External Shader",IDC_SHADER_FX,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,156,90,10 - LTEXT "External Shader",IDC_SHADER_FX_TEXT,15,171,75,8 - EDITTEXT IDC_SHADER_FX_EDIT,15,179,170,14,ES_AUTOHSCROLL - PUSHBUTTON "Browse",IDC_SHADER_FX_BUTTON,196,179,36,14 - LTEXT "External Shader Config",IDC_SHADER_FX_CONF_TEXT,15,196,75,8 - PUSHBUTTON "Browse",IDC_SHADER_FX_CONF_BUTTON,196,204,36,14 - EDITTEXT IDC_SHADER_FX_CONF_EDIT,15,204,170,14,ES_AUTOHSCROLL - COMBOBOX IDC_TVSHADER,75,4,76,14,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "TV Shader (F7):",IDC_STATIC,15,6,55,8 - GROUPBOX "Shade-Boost Settings",IDC_STATIC,6,28,236,106,BS_CENTER - GROUPBOX "External Shader Settings",IDC_STATIC,6,138,236,88,BS_CENTER -END - -IDD_CAPTURE DIALOGEX 0, 0, 279, 71 -STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU -CAPTION "Capture settings" -FONT 8, "MS Shell Dlg", 400, 0, 0x1 -BEGIN - EDITTEXT IDC_FILENAME,7,7,207,14,ES_AUTOHSCROLL - PUSHBUTTON "Browse...",IDC_BROWSE,222,7,50,14 - COMBOBOX IDC_CODECS,7,27,207,122,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - PUSHBUTTON "Config...",IDC_CONFIGURE,222,26,50,14 - LTEXT "Size:",IDC_STATIC,6,50,16,8 - EDITTEXT IDC_WIDTH,30,47,31,14,ES_RIGHT | ES_AUTOHSCROLL | ES_NUMBER - EDITTEXT IDC_HEIGHT,64,47,31,14,ES_RIGHT | ES_AUTOHSCROLL | ES_NUMBER - PUSHBUTTON "Cancel",IDCANCEL,169,47,50,14 - DEFPUSHBUTTON "OK",IDOK,221,47,50,14 - COMBOBOX IDC_COLORSPACE,102,47,48,32,CBS_DROPDOWNLIST | WS_TABSTOP -END - -IDD_GPUCONFIG DIALOGEX 0, 0, 189, 199 -STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU -CAPTION "Settings..." -FONT 8, "MS Shell Dlg", 400, 0, 0x1 -BEGIN - CONTROL IDB_LOGO9,IDC_LOGO9,"Static",SS_BITMAP,7,7,175,44 - LTEXT "Resolution:",IDC_STATIC,7,59,37,8 - COMBOBOX IDC_RESOLUTION,80,57,102,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Renderer:",IDC_STATIC,7,74,34,8 - COMBOBOX IDC_RENDERER,80,72,102,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Texture Filter (Del):",IDC_STATIC,7,90,64,8 - COMBOBOX IDC_FILTER,80,87,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Dithering (End):",IDC_STATIC,7,105,52,8 - COMBOBOX IDC_DITHERING,80,102,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Aspect Ratio (PgDn):",IDC_STATIC,7,120,68,8 - COMBOBOX IDC_ASPECTRATIO,80,117,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Extra Rend. Threads:",IDC_STATIC,7,157,70,8 - EDITTEXT IDC_SWTHREADS_EDIT,80,155,35,13,ES_AUTOHSCROLL | ES_NUMBER - CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,161,11,14 - DEFPUSHBUTTON "OK",IDOK,43,178,50,14 - PUSHBUTTON "Cancel",IDCANCEL,96,178,50,14 - CONTROL IDB_LOGO10,IDC_LOGO11,"Static",SS_BITMAP,7,7,173,42 - LTEXT "Internal Resolution:",IDC_STATIC,7,135,64,8 - COMBOBOX IDC_SCALE,80,132,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - CONTROL "Windowed",IDC_WINDOWED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,129,157,49,10 -END - -IDD_CONFIG DIALOGEX 0, 0, 243, 373 -STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU -CAPTION "GSdx Settings" -FONT 8, "MS Shell Dlg", 400, 0, 0x1 -BEGIN - CONTROL IDB_LOGO10,IDC_LOGO11,"Static",SS_BITMAP | SS_REALSIZECONTROL,35,6,173,42 - CONTROL IDB_LOGO9,IDC_LOGO9,"Static",SS_BITMAP | SS_REALSIZECONTROL,34,6,175,44 - CONTROL IDB_LOGOGL,IDC_LOGOGL,"Static",SS_BITMAP | SS_REALSIZECONTROL,34,6,175,44 - LTEXT "Adapter:",IDC_STATIC,6,57,30,8 - COMBOBOX IDC_ADAPTER,71,55,166,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Renderer:",IDC_STATIC,6,72,34,8 - COMBOBOX IDC_RENDERER,71,70,166,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "OpenCL Device:",IDC_OPENCL_TEXT,6,102,53,8 - COMBOBOX IDC_OPENCL_DEVICE,71,100,166,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Interlacing (F5):",IDC_STATIC,6,87,52,8 - COMBOBOX IDC_INTERLACE,71,85,166,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - GROUPBOX "Hardware Mode Settings",IDC_STATIC,6,116,231,152,BS_CENTER - CONTROL "Allow 8-Bit Textures",IDC_PALTEX,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,11,128,90,10 - LTEXT "Internal Resolution:",IDC_UPSCALE_MULTIPLIER_TEXT,22,144,79,8 - COMBOBOX IDC_UPSCALE_MULTIPLIER,105,142,127,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Custom Resolution:",IDC_CUSTOM_TEXT,22,160,79,8 - EDITTEXT IDC_RESX_EDIT,105,158,61,13,ES_AUTOHSCROLL | ES_NUMBER - CONTROL "",IDC_RESX,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,145,158,11,14 - EDITTEXT IDC_RESY_EDIT,171,158,61,13,ES_AUTOHSCROLL | ES_NUMBER - CONTROL "",IDC_RESY,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,221,158,11,14 - LTEXT "Texture Filtering:",IDC_FILTER_TEXT,22,176,79,8 - COMBOBOX IDC_FILTER,105,174,127,63,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Anisotropic Filtering:",IDC_AFCOMBO_TEXT,22,192,79,8 - COMBOBOX IDC_AFCOMBO,105,190,127,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "CRC Hack Level:",IDC_CRC_LEVEL_TEXT,22,208,79,8 - COMBOBOX IDC_CRC_LEVEL,105,206,127,63,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - CONTROL "Enable HW Hacks",IDC_HACKS_ENABLED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,11,223,71,10 - PUSHBUTTON "Configure Hacks",IDC_HACKSBUTTON,105,221,127,14 - CONTROL "Accurate Date",IDC_ACCURATE_DATE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,105,239,67,8 - CONTROL "Hardware Depth",IDC_TC_DEPTH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,11,239,72,8 - LTEXT "Blending Unit Accuracy:",IDC_ACCURATE_BLEND_UNIT_TEXT,22,252,79,10 - COMBOBOX IDC_ACCURATE_BLEND_UNIT,105,251,127,63,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - CONTROL "Logarithmic Z",IDC_LOGZ,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,11,239,90,8 - CONTROL "Alpha Correction",IDC_FBA,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,105,239,74,8 - CONTROL "Mipmapping",IDC_MIPMAP,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,11,304,90,8 - GROUPBOX "Software Mode Settings",IDC_STATIC,6,275,231,40,BS_CENTER - CONTROL "Edge Anti-aliasing (AA1)",IDC_AA1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,11,287,91,10 - LTEXT "Rendering threads:",IDC_SWTHREADS_TEXT,124,288,80,8 - EDITTEXT IDC_SWTHREADS_EDIT,198,286,34,13,ES_AUTOHSCROLL | ES_NUMBER - CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,221,285,11,14 - PUSHBUTTON "Configure",IDC_SHADEBUTTON,105,323,127,14 - LTEXT "Shader Configuration:",IDC_STATIC,11,326,90,14 - DEFPUSHBUTTON "OK",IDOK,69,353,50,14 - PUSHBUTTON "Cancel",IDCANCEL,125,353,50,14 -END - - -///////////////////////////////////////////////////////////////////////////// -// -// DESIGNINFO -// - -#ifdef APSTUDIO_INVOKED -GUIDELINES DESIGNINFO -BEGIN - IDD_HACKS, DIALOG - BEGIN - LEFTMARGIN, 7 - RIGHTMARGIN, 154 - VERTGUIDE, 14 - VERTGUIDE, 84 - VERTGUIDE, 88 - VERTGUIDE, 146 - TOPMARGIN, 7 - BOTTOMMARGIN, 177 - HORZGUIDE, 51 - HORZGUIDE, 64 - HORZGUIDE, 77 - END - - IDD_SHADER, DIALOG - BEGIN - LEFTMARGIN, 6 - RIGHTMARGIN, 242 - TOPMARGIN, 7 - BOTTOMMARGIN, 244 - END - - IDD_CAPTURE, DIALOG - BEGIN - VERTGUIDE, 6 - VERTGUIDE, 30 - VERTGUIDE, 271 - HORZGUIDE, 54 - END - - IDD_GPUCONFIG, DIALOG - BEGIN - LEFTMARGIN, 7 - RIGHTMARGIN, 182 - VERTGUIDE, 80 - VERTGUIDE, 182 - TOPMARGIN, 7 - BOTTOMMARGIN, 192 - END - - IDD_CONFIG, DIALOG - BEGIN - LEFTMARGIN, 6 - RIGHTMARGIN, 237 - VERTGUIDE, 11 - VERTGUIDE, 22 - VERTGUIDE, 101 - VERTGUIDE, 105 - VERTGUIDE, 232 - TOPMARGIN, 6 - BOTTOMMARGIN, 367 - END -END -#endif // APSTUDIO_INVOKED - - -///////////////////////////////////////////////////////////////////////////// -// -// Version -// - -VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,0,1,9 - PRODUCTVERSION 1,0,1,9 - FILEFLAGSMASK 0x3fL -#ifdef _DEBUG - FILEFLAGS 0x1L -#else - FILEFLAGS 0x0L -#endif - FILEOS 0x4L - FILETYPE 0x2L - FILESUBTYPE 0x0L -BEGIN - BLOCK "StringFileInfo" - BEGIN - BLOCK "040904e4" - BEGIN - VALUE "Comments", "http://guliverkli.sf.net/" - VALUE "CompanyName", "Gabest" - VALUE "FileDescription", "GS plugin for ps2 emulators" - VALUE "FileVersion", "1, 0, 1, 9" - VALUE "InternalName", "GSdx.dll" - VALUE "LegalCopyright", "Copyright (c) 2007-2008 Gabest. All rights reserved." - VALUE "OriginalFilename", "GSdx.dll" - VALUE "ProductName", "GSdx" - VALUE "ProductVersion", "1, 0, 1, 9" - END - END - BLOCK "VarFileInfo" - BEGIN - VALUE "Translation", 0x409, 1252 - END -END - -#endif // English (United States) resources -///////////////////////////////////////////////////////////////////////////// - - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// -#include "res/tfx.fx" -#include "res/convert.fx" -#include "res/interlace.fx" -#include "res/merge.fx" -#include "res/fxaa.fx" -#include "res/cs.fx" -#include "res/shadeboost.fx" -#include "res/tfx.cl" - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED - diff --git a/plugins/GSdx_legacy/GSdx.vcxproj b/plugins/GSdx_legacy/GSdx.vcxproj deleted file mode 100644 index 24c13826cf..0000000000 --- a/plugins/GSdx_legacy/GSdx.vcxproj +++ /dev/null @@ -1,952 +0,0 @@ - - - - - Debug AVX2 - Win32 - - - Debug AVX2 - x64 - - - Debug AVX - Win32 - - - Debug AVX - x64 - - - Debug - Win32 - - - Debug - x64 - - - Debug SSE4 - Win32 - - - Debug SSE4 - x64 - - - Debug SSSE3 - Win32 - - - Debug SSSE3 - x64 - - - Release AVX2 - Win32 - - - Release AVX2 - x64 - - - Release AVX - Win32 - - - Release AVX - x64 - - - Release - Win32 - - - Release - x64 - - - Release SSE4 - Win32 - - - Release SSE4 - x64 - - - Release SSSE3 - Win32 - - - Release SSSE3 - x64 - - - - GSdx-legacy - {18E42F6F-3A62-41EE-B42F-79366C4F1E95} - GSdx - Win32Proj - - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - true - $(DefaultPlatformToolset) - - - DynamicLibrary - MultiByte - false - $(DefaultPlatformToolset) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <_ProjectFileVersion>10.0.30319.1 - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Debug;%(AdditionalLibraryDirectories) - - - - - Use - - - - - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Release;%(AdditionalLibraryDirectories) - - - - - Use - - - - - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Release;%(AdditionalLibraryDirectories) - - - - - Use - - - - - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Debug;%(AdditionalLibraryDirectories) - - - - - Use - - - - - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Debug;%(AdditionalLibraryDirectories) - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Debug;%(AdditionalLibraryDirectories) - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Debug;%(AdditionalLibraryDirectories) - - - - - X64 - - - Use - - - - - - - - - X64 - - - Use - - - - - - - - - X64 - - - Use - - - - - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Release;%(AdditionalLibraryDirectories) - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Release;%(AdditionalLibraryDirectories) - - - - - Use - - - .\GSdx.def - MachineX86 - $(SvnRootDir)\deps\$(Platform)\Release;%(AdditionalLibraryDirectories) - - - - - X64 - - - Use - - - - - - - - - X64 - - - Use - - - - - - - - - X64 - - - Use - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - - - - - - - AssemblyAndSourceCode - AssemblyAndSourceCode - AssemblyAndSourceCode - - - - - - - - - - - - - - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - - - - - - - - - - - - - - - - - - - - AssemblyAndSourceCode - AssemblyAndSourceCode - - - - - - - - - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - Create - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {d6973076-9317-4ef2-a0b8-b7a18ac0713e} - - - {d80d4a75-c385-41bd-ae62-83d2e2b595a7} - false - - - {27f17499-a372-4408-8afa-4f9f4584fbd3} - - - - - - - - - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/GSdx.vcxproj.filters b/plugins/GSdx_legacy/GSdx.vcxproj.filters deleted file mode 100644 index 67d7f9f691..0000000000 --- a/plugins/GSdx_legacy/GSdx.vcxproj.filters +++ /dev/null @@ -1,613 +0,0 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - - {6d029896-e5fd-4b46-8576-52d7d90125e6} - - - {d6fcc23b-bc82-4390-8a9a-928910bc4123} - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Resource Files - - - Xbyak - - - Xbyak - - - Xbyak - - - Xbyak - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - - - Resource Files - - - Resource Files - - - Shaders - - - Shaders - - - Shaders - - - Shaders - - - - Shaders - - - Shaders - - - Shaders - - - Shaders - - - Resource Files - - - - - Resource Files - - - - - Resource Files - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/MurmurHash3.cpp b/plugins/GSdx_legacy/MurmurHash3.cpp deleted file mode 100644 index 66c8f08079..0000000000 --- a/plugins/GSdx_legacy/MurmurHash3.cpp +++ /dev/null @@ -1,336 +0,0 @@ -//----------------------------------------------------------------------------- -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. - -// Note - The x86 and x64 versions do _not_ produce the same results, as the -// algorithms are optimized for their respective platforms. You can still -// compile and run any of them on any platform, but your performance with the -// non-native version will be less than optimal. - -#include "stdafx.h" -#include "MurmurHash3.h" - -//----------------------------------------------------------------------------- -// Platform-specific functions and macros - -// Microsoft Visual Studio - -#if defined(_MSC_VER) - -#define FORCE_INLINE __forceinline - -#include - -#define ROTL32(x,y) _rotl(x,y) -#define ROTL64(x,y) _rotl64(x,y) - -#define BIG_CONSTANT(x) (x) - -// Other compilers - -#else // defined(_MSC_VER) - -#define FORCE_INLINE inline __attribute__((always_inline)) - -inline uint32_t rotl32 ( uint32_t x, int8_t r ) -{ - return (x << r) | (x >> (32 - r)); -} - -inline uint64_t rotl64 ( uint64_t x, int8_t r ) -{ - return (x << r) | (x >> (64 - r)); -} - -#define ROTL32(x,y) rotl32(x,y) -#define ROTL64(x,y) rotl64(x,y) - -#define BIG_CONSTANT(x) (x##LLU) - -#endif // !defined(_MSC_VER) - -//----------------------------------------------------------------------------- -// Block read - if your platform needs to do endian-swapping or can only -// handle aligned reads, do the conversion here - -FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) -{ - return p[i]; -} - -FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) -{ - return p[i]; -} - -//----------------------------------------------------------------------------- -// Finalization mix - force all bits of a hash block to avalanche - -FORCE_INLINE uint32_t fmix32 ( uint32_t h ) -{ - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; -} - -//---------- - -FORCE_INLINE uint64_t fmix64 ( uint64_t k ) -{ - k ^= k >> 33; - k *= BIG_CONSTANT(0xff51afd7ed558ccd); - k ^= k >> 33; - k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); - k ^= k >> 33; - - return k; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x86_32 ( const void * key, int len, - uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 4; - - uint32_t h1 = seed; - - const uint32_t c1 = 0xcc9e2d51; - const uint32_t c2 = 0x1b873593; - - //---------- - // body - - const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); - - for(int i = -nblocks; i; i++) - { - uint32_t k1 = getblock32(blocks,i); - - k1 *= c1; - k1 = ROTL32(k1,15); - k1 *= c2; - - h1 ^= k1; - h1 = ROTL32(h1,13); - h1 = h1*5+0xe6546b64; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*4); - - uint32_t k1 = 0; - - switch(len & 3) - { - case 3: k1 ^= tail[2] << 16; - case 2: k1 ^= tail[1] << 8; - case 1: k1 ^= tail[0]; - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; - - h1 = fmix32(h1); - - *(uint32_t*)out = h1; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x86_128 ( const void * key, const int len, - uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 16; - - uint32_t h1 = seed; - uint32_t h2 = seed; - uint32_t h3 = seed; - uint32_t h4 = seed; - - const uint32_t c1 = 0x239b961b; - const uint32_t c2 = 0xab0e9789; - const uint32_t c3 = 0x38b34ae5; - const uint32_t c4 = 0xa1e38b93; - - //---------- - // body - - const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); - - for(int i = -nblocks; i; i++) - { - uint32_t k1 = getblock32(blocks,i*4+0); - uint32_t k2 = getblock32(blocks,i*4+1); - uint32_t k3 = getblock32(blocks,i*4+2); - uint32_t k4 = getblock32(blocks,i*4+3); - - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - - h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; - - k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; - - h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; - - k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; - - h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; - - k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; - - h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*16); - - uint32_t k1 = 0; - uint32_t k2 = 0; - uint32_t k3 = 0; - uint32_t k4 = 0; - - switch(len & 15) - { - case 15: k4 ^= tail[14] << 16; - case 14: k4 ^= tail[13] << 8; - case 13: k4 ^= tail[12] << 0; - k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; - - case 12: k3 ^= tail[11] << 24; - case 11: k3 ^= tail[10] << 16; - case 10: k3 ^= tail[ 9] << 8; - case 9: k3 ^= tail[ 8] << 0; - k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; - - case 8: k2 ^= tail[ 7] << 24; - case 7: k2 ^= tail[ 6] << 16; - case 6: k2 ^= tail[ 5] << 8; - case 5: k2 ^= tail[ 4] << 0; - k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; - - case 4: k1 ^= tail[ 3] << 24; - case 3: k1 ^= tail[ 2] << 16; - case 2: k1 ^= tail[ 1] << 8; - case 1: k1 ^= tail[ 0] << 0; - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - h1 = fmix32(h1); - h2 = fmix32(h2); - h3 = fmix32(h3); - h4 = fmix32(h4); - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - ((uint32_t*)out)[0] = h1; - ((uint32_t*)out)[1] = h2; - ((uint32_t*)out)[2] = h3; - ((uint32_t*)out)[3] = h4; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x64_128 ( const void * key, const int len, - const uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 16; - - uint64_t h1 = seed; - uint64_t h2 = seed; - - const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); - const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); - - //---------- - // body - - const uint64_t * blocks = (const uint64_t *)(data); - - for(int i = 0; i < nblocks; i++) - { - uint64_t k1 = getblock64(blocks,i*2+0); - uint64_t k2 = getblock64(blocks,i*2+1); - - k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; - - h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; - - k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; - - h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*16); - - uint64_t k1 = 0; - uint64_t k2 = 0; - - switch(len & 15) - { - case 15: k2 ^= ((uint64_t)tail[14]) << 48; - case 14: k2 ^= ((uint64_t)tail[13]) << 40; - case 13: k2 ^= ((uint64_t)tail[12]) << 32; - case 12: k2 ^= ((uint64_t)tail[11]) << 24; - case 11: k2 ^= ((uint64_t)tail[10]) << 16; - case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; - case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; - k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; - - case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; - case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; - case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; - case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; - case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; - case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; - case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; - case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; - k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 = fmix64(h1); - h2 = fmix64(h2); - - h1 += h2; - h2 += h1; - - ((uint64_t*)out)[0] = h1; - ((uint64_t*)out)[1] = h2; -} - -//----------------------------------------------------------------------------- - diff --git a/plugins/GSdx_legacy/MurmurHash3.h b/plugins/GSdx_legacy/MurmurHash3.h deleted file mode 100644 index de12fb71fb..0000000000 --- a/plugins/GSdx_legacy/MurmurHash3.h +++ /dev/null @@ -1,11 +0,0 @@ -//----------------------------------------------------------------------------- -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. -//----------------------------------------------------------------------------- - -#pragma once - - -void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); -void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); -void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); diff --git a/plugins/GSdx_legacy/boost_spsc_queue.hpp b/plugins/GSdx_legacy/boost_spsc_queue.hpp deleted file mode 100644 index c1104a5de7..0000000000 --- a/plugins/GSdx_legacy/boost_spsc_queue.hpp +++ /dev/null @@ -1,177 +0,0 @@ -// This version is a stripped down version of boost/lockfree/spsc_queue.hpp boost_spsc_queue.hpp -// Rational -// * Performance is better on linux than the standard std::queue -// * Performance in the same on windows -// => 100-200MB of dependency feel rather unfriendly - -// Potential optimization -// * plug condition variable into the queue directly to avoid redundant m_count - -// * Restore boost optimization -// => unlikely or replace it with a % (if size is 2^n) - - -// lock-free single-producer/single-consumer ringbuffer -// this algorithm is implemented in various projects (linux kernel) -// -// Copyright (C) 2009-2013 Tim Blechmann -// -// Distributed under the Boost Software License, Version 1.0. (See -// accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -// Boost Software License - Version 1.0 - August 17th, 2003 -// -// Permission is hereby granted, free of charge, to any person or organization -// obtaining a copy of the software and accompanying documentation covered by -// this license (the "Software") to use, reproduce, display, distribute, -// execute, and transmit the Software, and to prepare derivative works of the -// Software, and to permit third-parties to whom the Software is furnished to -// do so, all subject to the following: -// -// The copyright notices in the Software and this entire statement, including -// the above license grant, this restriction and the following disclaimer, -// must be included in all copies of the Software, in whole or in part, and -// all derivative works of the Software, unless such copies or derivative -// works are solely in the form of machine-executable object code generated by -// a source language processor. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. - - -template -class ringbuffer_base -{ - static const int padding_size = 64 - sizeof(size_t); - - atomic write_index_; - char padding1[padding_size]; /* force read_index and write_index to different cache lines */ - atomic read_index_; - - T *buffer; - - ringbuffer_base(ringbuffer_base const &) = delete; - ringbuffer_base(ringbuffer_base &&) = delete; - const ringbuffer_base& operator=( const ringbuffer_base& ) = delete; - -public: - ringbuffer_base(void): - write_index_(0), read_index_(0) - { - // Use dynamically allocation here with no T object dependency - // Otherwise the ringbuffer_base destructor will call the destructor - // of T which crash if T is a (invalid) shared_ptr. - // - // Note another solution will be to create a char buffer as union of T - buffer = (T*)_aligned_malloc(sizeof(T)*max_size, 32); - } - - ~ringbuffer_base(void) { - // destroy all remaining items - T out; - while (pop(out)) {}; - - _aligned_free(buffer); - } - - - static size_t next_index(size_t arg) - { - size_t ret = arg + 1; -#if 0 - while (unlikely(ret >= max_size)) -#else - while (ret >= max_size) -#endif - ret -= max_size; - return ret; - } - - bool push(T const & t) - { - const size_t write_index = write_index_.load(memory_order_relaxed); // only written from push thread - const size_t next = next_index(write_index); - - if (next == read_index_.load(memory_order_acquire)) - return false; /* ringbuffer is full */ - - new (buffer + write_index) T(t); // copy-construct - - write_index_.store(next, memory_order_release); - - return true; - } - - bool pop (T & ret) - { - const size_t write_index = write_index_.load(memory_order_acquire); - const size_t read_index = read_index_.load(memory_order_relaxed); // only written from pop thread - if (empty(write_index, read_index)) - return false; - - ret = buffer[read_index]; - buffer[read_index].~T(); - - size_t next = next_index(read_index); - read_index_.store(next, memory_order_release); - return true; - } - - template - bool consume_one(Functor & f) - { - const size_t write_index = write_index_.load(memory_order_acquire); - const size_t read_index = read_index_.load(memory_order_relaxed); // only written from pop thread - if (empty(write_index, read_index)) - return false; - - f(buffer[read_index]); - buffer[read_index].~T(); - - size_t next = next_index(read_index); - read_index_.store(next, memory_order_release); - return true; - } - -public: - /** reset the ringbuffer - * - * \note Not thread-safe - * */ - void reset(void) - { - write_index_.store(0, memory_order_relaxed); - read_index_.store(0, memory_order_release); - } - - /** Check if the ringbuffer is empty - * - * \return true, if the ringbuffer is empty, false otherwise - * \note Due to the concurrent nature of the ringbuffer the result may be inaccurate. - * */ - bool empty(void) - { - return empty(write_index_.load(memory_order_relaxed), read_index_.load(memory_order_relaxed)); - } - - /** - * \return true, if implementation is lock-free. - * - * */ - bool is_lock_free(void) const - { - return write_index_.is_lock_free() && read_index_.is_lock_free(); - } - -private: - bool empty(size_t write_index, size_t read_index) - { - return write_index == read_index; - } -}; diff --git a/plugins/GSdx_legacy/config.h b/plugins/GSdx_legacy/config.h deleted file mode 100644 index 9527c023c9..0000000000 --- a/plugins/GSdx_legacy/config.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -//#define ENABLE_VTUNE - -#define ENABLE_JIT_RASTERIZER - -#define EXTERNAL_SHADER_LOADING 1 - -//#define ENABLE_DYNAMIC_CRC_HACK -#define DYNA_DLL_PATH "c:/dev/pcsx2/trunk/tools/dynacrchack/DynaCrcHack.dll" - -//#define DISABLE_HW_TEXTURE_CACHE // Slow but fixes a lot of bugs - -//#define DISABLE_BITMASKING - -//#define DISABLE_COLCLAMP - -//#define DISABLE_DATE - - -#if defined(_DEBUG) || defined(_DEVEL) -#define ENABLE_OGL_DEBUG // Create a debug context and check opengl command status. Allow also to dump various textures/states. -//#define ENABLE_OGL_DEBUG_FENCE -#endif -//#define ENABLE_OGL_DEBUG_MEM_BW // compute the quantity of data transfered (debug purpose) - -#if defined(__linux__) && !(defined(_DEBUG) || defined(_DEVEL)) -#define DISABLE_PERF_MON // Burn cycle for nothing in release mode -#endif - -#ifdef _WIN32 -//#define ENABLE_OPENCL -#endif diff --git a/plugins/GSdx_legacy/cpp_check.sh b/plugins/GSdx_legacy/cpp_check.sh deleted file mode 100644 index a9e8b321d5..0000000000 --- a/plugins/GSdx_legacy/cpp_check.sh +++ /dev/null @@ -1 +0,0 @@ -cppcheck --enable=warning,style,missingInclude -j 16 --platform=unix32 -D__linux__ -UENABLE_VTUNE -U_WINDOWS -U_M_AMD64 -U_MSC_VER . |& tee cpp_check.log diff --git a/plugins/GSdx_legacy/docs/TextureCache.odg b/plugins/GSdx_legacy/docs/TextureCache.odg deleted file mode 100644 index 1c7203d897..0000000000 Binary files a/plugins/GSdx_legacy/docs/TextureCache.odg and /dev/null differ diff --git a/plugins/GSdx_legacy/linux_replay.cpp b/plugins/GSdx_legacy/linux_replay.cpp deleted file mode 100644 index 5aae1e6c8a..0000000000 --- a/plugins/GSdx_legacy/linux_replay.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2011-2012 Hainaut gregory - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include - -static void* handle; - -void help() -{ - fprintf(stderr, "Loader gs file\n"); - fprintf(stderr, "ARG1 GSdx plugin\n"); - fprintf(stderr, "ARG2 .gs file\n"); - fprintf(stderr, "ARG3 Ini directory\n"); - if (handle) { - dlclose(handle); - } - exit(1); -} - -char* read_env(const char* var) { - char* v = getenv(var); - if (!v) { - fprintf(stderr, "Failed to get %s\n", var); - help(); - } - return v; -} - -int main ( int argc, char *argv[] ) -{ - if (argc < 1) help(); - - char* plugin; - char* gs; - if (argc > 2) { - plugin = argv[1]; - gs = argv[2]; - } else { - plugin = read_env("GSDUMP_SO"); - gs = argv[1]; - } - - handle = dlopen(plugin, RTLD_LAZY|RTLD_GLOBAL); - if (handle == NULL) { - fprintf(stderr, "Failed to dlopen plugin %s\n", plugin); - help(); - } - - __attribute__((stdcall)) void (*GSsetSettingsDir_ptr)(const char*); - __attribute__((stdcall)) void (*GSReplay_ptr)(char*, int); - - *(void**)(&GSsetSettingsDir_ptr) = dlsym(handle, "GSsetSettingsDir"); - *(void**)(&GSReplay_ptr) = dlsym(handle, "GSReplay"); - - if (argc == 2) { - char *ini = read_env("GSDUMP_CONF"); - - GSsetSettingsDir_ptr(ini); - - } else if (argc == 4) { - (void)GSsetSettingsDir_ptr(argv[3]); - - } else if ( argc == 3) { -#ifdef XDG_STD - char *val = read_env("HOME"); - - std::string ini_dir(val); - ini_dir += "/.config/pcsx2/inis"; - - GSsetSettingsDir_ptr(ini_dir.c_str()); -#else - fprintf(stderr, "default ini dir only supported on XDG\n"); - help(); -#endif - } - - GSReplay_ptr(gs, 12); - - if (handle) { - dlclose(handle); - } -} diff --git a/plugins/GSdx_legacy/res/convert.fx b/plugins/GSdx_legacy/res/convert.fx deleted file mode 100644 index c482722391..0000000000 --- a/plugins/GSdx_legacy/res/convert.fx +++ /dev/null @@ -1,324 +0,0 @@ -#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency -#if SHADER_MODEL >= 0x400 - -struct VS_INPUT -{ - float4 p : POSITION; - float2 t : TEXCOORD0; -}; - -struct VS_OUTPUT -{ - float4 p : SV_Position; - float2 t : TEXCOORD0; -}; - -Texture2D Texture; -SamplerState TextureSampler; - -float4 sample_c(float2 uv) -{ - return Texture.Sample(TextureSampler, uv); -} - -struct PS_INPUT -{ - float4 p : SV_Position; - float2 t : TEXCOORD0; -}; - -struct PS_OUTPUT -{ - float4 c : SV_Target0; -}; - -#elif SHADER_MODEL <= 0x300 - -struct VS_INPUT -{ - float4 p : POSITION; - float2 t : TEXCOORD0; -}; - -struct VS_OUTPUT -{ - float4 p : POSITION; - float2 t : TEXCOORD0; -}; - -struct PS_INPUT -{ -#if SHADER_MODEL < 0x300 - float4 p : TEXCOORD1; -#else - float4 p : VPOS; -#endif - float2 t : TEXCOORD0; -}; - -struct PS_OUTPUT -{ - float4 c : COLOR; -}; - -sampler Texture : register(s0); - -float4 sample_c(float2 uv) -{ - return tex2D(Texture, uv); -} - -#endif - -VS_OUTPUT vs_main(VS_INPUT input) -{ - VS_OUTPUT output; - - output.p = input.p; - output.t = input.t; - - return output; -} - -PS_OUTPUT ps_main0(PS_INPUT input) -{ - PS_OUTPUT output; - - output.c = sample_c(input.t); - - return output; -} - -PS_OUTPUT ps_main7(PS_INPUT input) -{ - PS_OUTPUT output; - - float4 c = sample_c(input.t); - - c.a = dot(c.rgb, float3(0.299, 0.587, 0.114)); - - output.c = c; - - return output; -} - -float4 ps_crt(PS_INPUT input, int i) -{ - float4 mask[4] = - { - float4(1, 0, 0, 0), - float4(0, 1, 0, 0), - float4(0, 0, 1, 0), - float4(1, 1, 1, 0) - }; - - return sample_c(input.t) * saturate(mask[i] + 0.5f); -} - -float4 ps_scanlines(PS_INPUT input, int i) -{ - float4 mask[2] = - { - float4(1, 1, 1, 0), - float4(0, 0, 0, 0) - }; - - return sample_c(input.t) * saturate(mask[i] + 0.5f); -} - -#if SHADER_MODEL >= 0x400 - -uint ps_main1(PS_INPUT input) : SV_Target0 -{ - float4 c = sample_c(input.t); - - c.a *= 256.0f / 127; // hm, 0.5 won't give us 1.0 if we just multiply with 2 - - uint4 i = c * float4(0x001f, 0x03e0, 0x7c00, 0x8000); - - return (i.x & 0x001f) | (i.y & 0x03e0) | (i.z & 0x7c00) | (i.w & 0x8000); -} - -PS_OUTPUT ps_main2(PS_INPUT input) -{ - PS_OUTPUT output; - - clip(sample_c(input.t).a - 127.5f / 255); // >= 0x80 pass - - output.c = 0; - - return output; -} - -PS_OUTPUT ps_main3(PS_INPUT input) -{ - PS_OUTPUT output; - - clip(127.5f / 255 - sample_c(input.t).a); // < 0x80 pass (== 0x80 should not pass) - - output.c = 0; - - return output; -} - -PS_OUTPUT ps_main4(PS_INPUT input) -{ - PS_OUTPUT output; - - output.c = fmod(sample_c(input.t) * 255 + 0.5f, 256) / 255; - - return output; -} - -PS_OUTPUT ps_main5(PS_INPUT input) // scanlines -{ - PS_OUTPUT output; - - uint4 p = (uint4)input.p; - - output.c = ps_scanlines(input, p.y % 2); - - return output; -} - -PS_OUTPUT ps_main6(PS_INPUT input) // diagonal -{ - PS_OUTPUT output; - - uint4 p = (uint4)input.p; - - output.c = ps_crt(input, (p.x + (p.y % 3)) % 3); - - return output; -} - -PS_OUTPUT ps_main8(PS_INPUT input) // triangular -{ - PS_OUTPUT output; - - uint4 p = (uint4)input.p; - - // output.c = ps_crt(input, ((p.x + (p.y & 1) * 3) >> 1) % 3); - output.c = ps_crt(input, ((p.x + ((p.y >> 1) & 1) * 3) >> 1) % 3); - - return output; -} - -static const float PI = 3.14159265359f; -PS_OUTPUT ps_main9(PS_INPUT input) // triangular -{ - PS_OUTPUT output; - - float2 texdim, halfpixel; - Texture.GetDimensions(texdim.x, texdim.y); - if (ddy(input.t.y) * texdim.y > 0.5) - output.c = sample_c(input.t); - else - output.c = (0.9 - 0.4 * cos(2 * PI * input.t.y * texdim.y)) * sample_c(float2(input.t.x, (floor(input.t.y * texdim.y) + 0.5) / texdim.y)); - - return output; -} - -#elif SHADER_MODEL <= 0x300 - -PS_OUTPUT ps_main1(PS_INPUT input) -{ - PS_OUTPUT output; - - float4 c = sample_c(input.t); - - c.a *= 128.0f / 255; // *= 0.5f is no good here, need to do this in order to get 0x80 for 1.0f (instead of 0x7f) - - output.c = c; - - return output; -} - -PS_OUTPUT ps_main2(PS_INPUT input) -{ - PS_OUTPUT output; - - clip(sample_c(input.t).a - 255.0f / 255); // >= 0x80 pass - - output.c = 0; - - return output; -} - -PS_OUTPUT ps_main3(PS_INPUT input) -{ - PS_OUTPUT output; - - clip(254.95f / 255 - sample_c(input.t).a); // < 0x80 pass (== 0x80 should not pass) - - output.c = 0; - - return output; -} - -PS_OUTPUT ps_main4(PS_INPUT input) -{ - PS_OUTPUT output; - - output.c = 1; - - return output; -} - -PS_OUTPUT ps_main5(PS_INPUT input) // scanlines -{ - PS_OUTPUT output; - - int4 p = (int4)input.p; - - output.c = ps_scanlines(input, p.y % 2); - - return output; -} - -PS_OUTPUT ps_main6(PS_INPUT input) // diagonal -{ - PS_OUTPUT output; - - int4 p = (int4)input.p; - - output.c = ps_crt(input, (p.x + (p.y % 3)) % 3); - - return output; -} - -PS_OUTPUT ps_main8(PS_INPUT input) // triangular -{ - PS_OUTPUT output; - - int4 p = (int4)input.p; - - // output.c = ps_crt(input, ((p.x + (p.y % 2) * 3) / 2) % 3); - output.c = ps_crt(input, ((p.x + ((p.y / 2) % 2) * 3) / 2) % 3); - - return output; -} - -static const float PI = 3.14159265359f; -PS_OUTPUT ps_main9(PS_INPUT input) // triangular -{ - PS_OUTPUT output; - - // Needs DX9 conversion - /*float2 texdim, halfpixel; - Texture.GetDimensions(texdim.x, texdim.y); - if (ddy(input.t.y) * texdim.y > 0.5) - output.c = sample_c(input.t); - else - output.c = (0.5 - 0.5 * cos(2 * PI * input.t.y * texdim.y)) * sample_c(float2(input.t.x, (floor(input.t.y * texdim.y) + 0.5) / texdim.y)); -*/ - - // replacement shader - int4 p = (int4)input.p; - output.c = ps_crt(input, ((p.x + ((p.y / 2) % 2) * 3) / 2) % 3); - - return output; -} - -#endif -#endif diff --git a/plugins/GSdx_legacy/res/cs.fx b/plugins/GSdx_legacy/res/cs.fx deleted file mode 100644 index c84211ba95..0000000000 --- a/plugins/GSdx_legacy/res/cs.fx +++ /dev/null @@ -1,387 +0,0 @@ -#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency - -#ifndef VS_TME -#define VS_TME 1 -#define VS_FST 1 -#endif - -#ifndef GS_IIP -#define GS_IIP 0 -#define GS_PRIM 2 -#endif - -#ifndef PS_BATCH_SIZE -#define PS_BATCH_SIZE 2048 -#define PS_FPSM PSM_PSMCT32 -#define PS_ZPSM PSM_PSMZ16 -#endif - -#define PSM_PSMCT32 0 -#define PSM_PSMCT24 1 -#define PSM_PSMCT16 2 -#define PSM_PSMCT16S 10 -#define PSM_PSMT8 19 -#define PSM_PSMT4 20 -#define PSM_PSMT8H 27 -#define PSM_PSMT4HL 36 -#define PSM_PSMT4HH 44 -#define PSM_PSMZ32 48 -#define PSM_PSMZ24 49 -#define PSM_PSMZ16 50 -#define PSM_PSMZ16S 58 - -struct VS_INPUT -{ - float2 st : TEXCOORD0; - float4 c : COLOR0; - float q : TEXCOORD1; - uint2 p : POSITION0; - uint z : POSITION1; - uint2 uv : TEXCOORD2; - float4 f : COLOR1; -}; - -struct VS_OUTPUT -{ - float4 p : SV_Position; - float2 z : TEXCOORD0; - float4 t : TEXCOORD1; - float4 c : COLOR0; -}; - -struct GS_OUTPUT -{ - float4 p : SV_Position; - float2 z : TEXCOORD0; - float4 t : TEXCOORD1; - float4 c : COLOR0; - uint id : SV_PrimitiveID; -}; - -cbuffer VSConstantBuffer : register(c0) -{ - float4 VertexScale; - float4 VertexOffset; -}; - -cbuffer PSConstantBuffer : register(c0) -{ - uint2 WriteMask; -}; - -struct FragmentLinkItem -{ - uint c, z, id, next; -}; - -RWByteAddressBuffer VideoMemory : register(u0); -RWStructuredBuffer FragmentLinkBuffer : register(u1); -RWByteAddressBuffer StartOffsetBuffer : register(u2); -//RWTexture2D VideoMemory : register(u2); // 8192 * 512 R8_UINT - -Buffer FZRowOffset : register(t0); -Buffer FZColOffset : register(t1); -Texture2D Palette : register(t2); -Texture2D Texture : register(t3); - -VS_OUTPUT vs_main(VS_INPUT input) -{ - VS_OUTPUT output; - - output.p = float4(input.p, 0.0f, 0.0f) * VertexScale - VertexOffset; - output.z = float2(input.z & 0xffff, input.z >> 16); // TODO: min(input.z, 0xffffff00) ? - - if(VS_TME) - { - if(VS_FST) - { - output.t.xy = input.uv; - output.t.w = 1.0f; - } - else - { - output.t.xy = input.st; - output.t.w = input.q; - } - } - else - { - output.t.xy = 0; - output.t.w = 1.0f; - } - - output.c = input.c; - output.t.z = input.f.r; - - return output; -} - -#if GS_PRIM == 0 - -[maxvertexcount(1)] -void gs_main(point VS_OUTPUT input[1], inout PointStream stream, uint id : SV_PrimitiveID) -{ - GS_OUTPUT output; - - output.p = input[0].p; - output.z = input[0].z; - output.t = input[0].t; - output.c = input[0].c; - output.id = id; - - stream.Append(output); -} - -#elif GS_PRIM == 1 - -[maxvertexcount(2)] -void gs_main(line VS_OUTPUT input[2], inout LineStream stream, uint id : SV_PrimitiveID) -{ - [unroll] - for(int i = 0; i < 2; i++) - { - GS_OUTPUT output; - - output.p = input[i].p; - output.z = input[i].z; - output.t = input[i].t; - output.c = input[i].c; - output.id = id; - -#if GS_IIP == 0 - if(i != 1) output.c = input[1].c; -#endif - - stream.Append(output); - } -} - -#elif GS_PRIM == 2 - -[maxvertexcount(3)] -void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream stream, uint id : SV_PrimitiveID) -{ - [unroll] - for(int i = 0; i < 3; i++) - { - GS_OUTPUT output; - - output.p = input[i].p; - output.z = input[i].z; - output.t = input[i].t; - output.c = input[i].c; - output.id = id; - -#if GS_IIP == 0 - if(i != 2) output.c = input[2].c; -#endif - - stream.Append(output); - } -} - -#elif GS_PRIM == 3 - -[maxvertexcount(4)] -void gs_main(line VS_OUTPUT input[2], inout TriangleStream stream, uint id : SV_PrimitiveID) -{ - GS_OUTPUT lt, rb, lb, rt; - - lt.p = input[0].p; - lt.z = input[1].z; - lt.t.xy = input[0].t.xy; - lt.t.zw = input[1].t.zw; - lt.c = input[0].c; - lt.id = id; - -#if GS_IIP == 0 - lt.c = input[1].c; -#endif - - rb.p = input[1].p; - rb.z = input[1].z; - rb.t = input[1].t; - rb.c = input[1].c; - rb.id = id; - - lb = lt; - lb.p.y = rb.p.y; - lb.t.y = rb.t.y; - - rt = rb; - rt.p.y = lt.p.y; - rt.t.y = lt.t.y; - - stream.Append(lt); - stream.Append(lb); - stream.Append(rt); - stream.Append(rb); -} - -#endif - -uint CompressColor32(float4 f) -{ - uint4 c = (uint4)(f * 0xff) << uint4(0, 8, 16, 24); - - return c.r | c.g | c.b | c.a; -} - -uint DecompressColor16(uint c) -{ - uint r = (c & 0x001f) << 3; - uint g = (c & 0x03e0) << 6; - uint b = (c & 0x7c00) << 9; - uint a = (c & 0x8000) << 15; - - return r | g | b | a; -} - -uint ReadPixel(uint addr) -{ - return VideoMemory.Load(addr) >> ((addr & 2) << 3); -} - -void WritePixel(uint addr, uint value, uint psm) -{ - uint tmp; - - switch(psm) - { - case PSM_PSMCT32: - case PSM_PSMZ32: - case PSM_PSMCT24: - case PSM_PSMZ24: - VideoMemory.Store(addr, value); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - tmp = (addr & 2) << 3; - value = ((value << tmp) ^ VideoMemory.Load(addr)) & (0x0000ffff << tmp); - VideoMemory.InterlockedXor(addr, value, tmp); - break; - } -} - -void ps_main0(GS_OUTPUT input) -{ - uint x = (uint)input.p.x; - uint y = (uint)input.p.y; - - uint tail = FragmentLinkBuffer.IncrementCounter(); - - uint index = (y << 11) + x; - uint next = 0; - - StartOffsetBuffer.InterlockedExchange(index * 4, tail, next); - - FragmentLinkItem item; - - // TODO: preprocess color (tfx, alpha test), z-test - - item.c = CompressColor32(input.c); - item.z = (uint)(input.z.y * 0x10000 + input.z.x); - item.id = input.id; - item.next = next; - - FragmentLinkBuffer[tail] = item; -} - -void ps_main1(GS_OUTPUT input) -{ - uint2 pos = (uint2)input.p.xy; - - // sort fragments - - uint StartOffsetIndex = (pos.y << 11) + pos.x; - - int index[PS_BATCH_SIZE]; - int count = 0; - - uint next = StartOffsetBuffer.Load(StartOffsetIndex * 4); - - StartOffsetBuffer.Store(StartOffsetIndex * 4, 0); - - [allow_uav_condition] - while(next != 0) - { - index[count++] = next; - - next = FragmentLinkBuffer[next].next; - } - - int N2 = 1 << (int)(ceil(log2(count))); - - [allow_uav_condition] - for(int i = count; i < N2; i++) - { - index[i] = 0; - } - - [allow_uav_condition] - for(int k = 2; k <= N2; k = 2 * k) - { - [allow_uav_condition] - for(int j = k >> 1; j > 0 ; j = j >> 1) - { - [allow_uav_condition] - for(int i = 0; i < N2; i++) - { - uint i_id = FragmentLinkBuffer[index[i]].id; - - int ixj = i ^ j; - - if(ixj > i) - { - uint ixj_id = FragmentLinkBuffer[index[ixj]].id; - - if((i & k) == 0 && i_id > ixj_id) - { - int temp = index[i]; - index[i] = index[ixj]; - index[ixj] = temp; - } - - if((i & k) != 0 && i_id < ixj_id) - { - int temp = index[i]; - index[i] = index[ixj]; - index[ixj] = temp; - } - } - } - } - } - - uint2 addr = (uint2)(FZRowOffset[pos.y] + FZColOffset[pos.x]) << 1; - - uint dc = ReadPixel(addr.x); - uint dz = ReadPixel(addr.y); - - uint sc = dc; - uint sz = dz; - - [allow_uav_condition] - while(--count >= 0) - { - FragmentLinkItem f = FragmentLinkBuffer[index[count]]; - - // TODO - - if(sz < f.z) - { - sc = f.c; - sz = f.z; - } - } - - uint c = sc; // (dc & ~WriteMask.x) | (sc & WriteMask.x); - uint z = 0;//sz; //(dz & ~WriteMask.y) | (sz & WriteMask.y); - - WritePixel(addr.x, c, PS_FPSM); - WritePixel(addr.y, z, PS_ZPSM); -} - -#endif diff --git a/plugins/GSdx_legacy/res/fxaa.fx b/plugins/GSdx_legacy/res/fxaa.fx deleted file mode 100644 index d38d54623b..0000000000 --- a/plugins/GSdx_legacy/res/fxaa.fx +++ /dev/null @@ -1,588 +0,0 @@ -#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) - -#ifndef FXAA_GLSL_130 - #define FXAA_GLSL_130 0 -#endif - -#define UHQ_FXAA 1 //High Quality Fast Approximate Anti Aliasing. Adapted for GSdx from Timothy Lottes FXAA 3.11. -#define FxaaSubpixMax 0.0 //[0.00 to 1.00] Amount of subpixel aliasing removal. 0.00: Edge only antialiasing (no blurring) -#define FxaaEarlyExit 1 //[0 or 1] Use Fxaa early exit pathing. When disabled, the entire scene is antialiased(FSAA). 0 is off, 1 is on. - -/*------------------------------------------------------------------------------ - [GLOBALS|FUNCTIONS] -------------------------------------------------------------------------------*/ -#if (FXAA_GLSL_130 == 1) - -struct vertex_basic -{ - vec4 p; - vec2 t; -}; - -layout(binding = 0) uniform sampler2D TextureSampler; - -in SHADER -{ - vec4 p; - vec2 t; -} PSin; - -layout(location = 0) out vec4 SV_Target0; - -#else - -#if (SHADER_MODEL >= 0x400) -Texture2D Texture : register(t0); -SamplerState TextureSampler : register(s0); -#else -texture2D Texture : register(t0); -sampler2D TextureSampler : register(s0); -#define SamplerState sampler2D -#endif - -cbuffer cb0 -{ - float4 _rcpFrame : register(c0); -}; - -struct VS_INPUT -{ - float4 p : POSITION; - float2 t : TEXCOORD0; -}; - -struct VS_OUTPUT -{ - #if (SHADER_MODEL >= 0x400) - float4 p : SV_Position; - #else - float4 p : TEXCOORD1; - #endif - float2 t : TEXCOORD0; -}; - -struct PS_OUTPUT -{ - #if (SHADER_MODEL >= 0x400) - float4 c : SV_Target0; - #else - float4 c : COLOR0; - #endif -}; - -#endif - -/*------------------------------------------------------------------------------ - [FXAA CODE SECTION] -------------------------------------------------------------------------------*/ - -#if (SHADER_MODEL >= 0x500) -#define FXAA_HLSL_5 1 -#define FXAA_GATHER4_ALPHA 1 -#elif (SHADER_MODEL >= 0x400) -#define FXAA_HLSL_4 1 -#define FXAA_GATHER4_ALPHA 0 -#elif (FXAA_GLSL_130 == 1) -#define FXAA_GATHER4_ALPHA 1 -#else -#define FXAA_HLSL_3 1 -#define FXAA_GATHER4_ALPHA 0 -#endif - -#if (FXAA_HLSL_5 == 1) -struct FxaaTex { SamplerState smpl; Texture2D tex; }; -#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) -#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) -#define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p) -#define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o) -#define FxaaDiscard clip(-1) -#define FxaaSat(x) saturate(x) - -#elif (FXAA_HLSL_4 == 1) -struct FxaaTex { SamplerState smpl; Texture2D tex; }; -#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) -#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) -#define FxaaDiscard clip(-1) -#define FxaaSat(x) saturate(x) - -#elif (FXAA_HLSL_3 == 1) -#define FxaaTex sampler2D -#define int2 float2 -#define FxaaSat(x) saturate(x) -#define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0)) -#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0)) - -#elif (FXAA_GLSL_130 == 1) - -#define int2 ivec2 -#define float2 vec2 -#define float3 vec3 -#define float4 vec4 -#define FxaaDiscard discard -#define FxaaSat(x) clamp(x, 0.0, 1.0) -#define FxaaTex sampler2D -#define FxaaTexTop(t, p) textureLod(t, p, 0.0) -#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o) -#if (FXAA_GATHER4_ALPHA == 1) -// use #extension GL_ARB_gpu_shader5 : enable -#define FxaaTexAlpha4(t, p) textureGather(t, p, 3) -#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3) -#endif - -#endif - -#define FxaaEdgeThreshold 0.063 -#define FxaaEdgeThresholdMin 0.00 -#define FXAA_QUALITY__P0 1.0 -#define FXAA_QUALITY__P1 1.5 -#define FXAA_QUALITY__P2 2.0 -#define FXAA_QUALITY__P3 2.0 -#define FXAA_QUALITY__P4 2.0 -#define FXAA_QUALITY__P5 2.0 -#define FXAA_QUALITY__P6 2.0 -#define FXAA_QUALITY__P7 2.0 -#define FXAA_QUALITY__P8 2.0 -#define FXAA_QUALITY__P9 2.0 -#define FXAA_QUALITY__P10 4.0 -#define FXAA_QUALITY__P11 8.0 -#define FXAA_QUALITY__P12 8.0 - -/*------------------------------------------------------------------------------ - [GAMMA PREPASS CODE SECTION] -------------------------------------------------------------------------------*/ -float RGBLuminance(float3 color) -{ - const float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750); - return dot(color.rgb, lumCoeff); -} - -#if (FXAA_GLSL_130 == 0) -#define PixelSize float2(_rcpFrame.x, _rcpFrame.y) -#endif - - -float3 RGBGammaToLinear(float3 color, float gamma) -{ - color = FxaaSat(color); - color.r = (color.r <= 0.0404482362771082) ? - color.r / 12.92 : pow((color.r + 0.055) / 1.055, gamma); - color.g = (color.g <= 0.0404482362771082) ? - color.g / 12.92 : pow((color.g + 0.055) / 1.055, gamma); - color.b = (color.b <= 0.0404482362771082) ? - color.b / 12.92 : pow((color.b + 0.055) / 1.055, gamma); - - return color; -} - -float3 LinearToRGBGamma(float3 color, float gamma) -{ - color = FxaaSat(color); - color.r = (color.r <= 0.00313066844250063) ? - color.r * 12.92 : 1.055 * pow(color.r, 1.0 / gamma) - 0.055; - color.g = (color.g <= 0.00313066844250063) ? - color.g * 12.92 : 1.055 * pow(color.g, 1.0 / gamma) - 0.055; - color.b = (color.b <= 0.00313066844250063) ? - color.b * 12.92 : 1.055 * pow(color.b, 1.0 / gamma) - 0.055; - - return color; -} - -float4 PreGammaPass(float4 color, float2 uv0) -{ - #if (SHADER_MODEL >= 0x400) - color = Texture.Sample(TextureSampler, uv0); - #elif (FXAA_GLSL_130 == 1) - color = texture(TextureSampler, uv0); - #else - color = tex2D(TextureSampler, uv0); - #endif - - const float GammaConst = 2.233; - color.rgb = RGBGammaToLinear(color.rgb, GammaConst); - color.rgb = LinearToRGBGamma(color.rgb, GammaConst); - color.a = RGBLuminance(color.rgb); - - return color; -} - - -/*------------------------------------------------------------------------------ - [FXAA CODE SECTION] -------------------------------------------------------------------------------*/ - -float FxaaLuma(float4 rgba) -{ - rgba.w = RGBLuminance(rgba.xyz); - return rgba.w; -} - -float4 FxaaPixelShader(float2 pos, FxaaTex tex, float2 fxaaRcpFrame, float fxaaSubpix, float fxaaEdgeThreshold, float fxaaEdgeThresholdMin) -{ - float2 posM; - posM.x = pos.x; - posM.y = pos.y; - - #if (FXAA_GATHER4_ALPHA == 1) - float4 rgbyM = FxaaTexTop(tex, posM); - float4 luma4A = FxaaTexAlpha4(tex, posM); - float4 luma4B = FxaaTexOffAlpha4(tex, posM, int2(-1, -1)); - rgbyM.w = RGBLuminance(rgbyM.xyz); - - #define lumaM rgbyM.w - #define lumaE luma4A.z - #define lumaS luma4A.x - #define lumaSE luma4A.y - #define lumaNW luma4B.w - #define lumaN luma4B.z - #define lumaW luma4B.x - - #else - float4 rgbyM = FxaaTexTop(tex, posM); - rgbyM.w = RGBLuminance(rgbyM.xyz); - #define lumaM rgbyM.w - - float lumaS = FxaaLuma(FxaaTexOff(tex, posM, int2( 0, 1), fxaaRcpFrame.xy)); - float lumaE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 0), fxaaRcpFrame.xy)); - float lumaN = FxaaLuma(FxaaTexOff(tex, posM, int2( 0,-1), fxaaRcpFrame.xy)); - float lumaW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 0), fxaaRcpFrame.xy)); - #endif - - float maxSM = max(lumaS, lumaM); - float minSM = min(lumaS, lumaM); - float maxESM = max(lumaE, maxSM); - float minESM = min(lumaE, minSM); - float maxWN = max(lumaN, lumaW); - float minWN = min(lumaN, lumaW); - - float rangeMax = max(maxWN, maxESM); - float rangeMin = min(minWN, minESM); - float range = rangeMax - rangeMin; - float rangeMaxScaled = rangeMax * fxaaEdgeThreshold; - float rangeMaxClamped = max(fxaaEdgeThresholdMin, rangeMaxScaled); - - bool earlyExit = range < rangeMaxClamped; - #if (FxaaEarlyExit == 1) - if(earlyExit) { return rgbyM; } - #endif - - #if (FXAA_GATHER4_ALPHA == 0) - float lumaNW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1,-1), fxaaRcpFrame.xy)); - float lumaSE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 1), fxaaRcpFrame.xy)); - float lumaNE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1,-1), fxaaRcpFrame.xy)); - float lumaSW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 1), fxaaRcpFrame.xy)); - #else - float lumaNE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1,-1), fxaaRcpFrame.xy)); - float lumaSW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 1), fxaaRcpFrame.xy)); - #endif - - float lumaNS = lumaN + lumaS; - float lumaWE = lumaW + lumaE; - float subpixRcpRange = 1.0/range; - float subpixNSWE = lumaNS + lumaWE; - float edgeHorz1 = (-2.0 * lumaM) + lumaNS; - float edgeVert1 = (-2.0 * lumaM) + lumaWE; - float lumaNESE = lumaNE + lumaSE; - float lumaNWNE = lumaNW + lumaNE; - float edgeHorz2 = (-2.0 * lumaE) + lumaNESE; - float edgeVert2 = (-2.0 * lumaN) + lumaNWNE; - - float lumaNWSW = lumaNW + lumaSW; - float lumaSWSE = lumaSW + lumaSE; - float edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2); - float edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2); - float edgeHorz3 = (-2.0 * lumaW) + lumaNWSW; - float edgeVert3 = (-2.0 * lumaS) + lumaSWSE; - float edgeHorz = abs(edgeHorz3) + edgeHorz4; - float edgeVert = abs(edgeVert3) + edgeVert4; - - float subpixNWSWNESE = lumaNWSW + lumaNESE; - float lengthSign = fxaaRcpFrame.x; - bool horzSpan = edgeHorz >= edgeVert; - float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE; - if(!horzSpan) lumaN = lumaW; - if(!horzSpan) lumaS = lumaE; - if(horzSpan) lengthSign = fxaaRcpFrame.y; - float subpixB = (subpixA * (1.0/12.0)) - lumaM; - - float gradientN = lumaN - lumaM; - float gradientS = lumaS - lumaM; - float lumaNN = lumaN + lumaM; - float lumaSS = lumaS + lumaM; - bool pairN = abs(gradientN) >= abs(gradientS); - float gradient = max(abs(gradientN), abs(gradientS)); - if(pairN) lengthSign = -lengthSign; - float subpixC = FxaaSat(abs(subpixB) * subpixRcpRange); - - float2 posB; - posB.x = posM.x; - posB.y = posM.y; - float2 offNP; - offNP.x = (!horzSpan) ? 0.0 : fxaaRcpFrame.x; - offNP.y = ( horzSpan) ? 0.0 : fxaaRcpFrame.y; - if(!horzSpan) posB.x += lengthSign * 0.5; - if( horzSpan) posB.y += lengthSign * 0.5; - - float2 posN; - posN.x = posB.x - offNP.x * FXAA_QUALITY__P0; - posN.y = posB.y - offNP.y * FXAA_QUALITY__P0; - float2 posP; - posP.x = posB.x + offNP.x * FXAA_QUALITY__P0; - posP.y = posB.y + offNP.y * FXAA_QUALITY__P0; - float subpixD = ((-2.0)*subpixC) + 3.0; - float lumaEndN = FxaaLuma(FxaaTexTop(tex, posN)); - float subpixE = subpixC * subpixC; - float lumaEndP = FxaaLuma(FxaaTexTop(tex, posP)); - - if(!pairN) lumaNN = lumaSS; - float gradientScaled = gradient * 1.0/4.0; - float lumaMM = lumaM - lumaNN * 0.5; - float subpixF = subpixD * subpixE; - bool lumaMLTZero = lumaMM < 0.0; - lumaEndN -= lumaNN * 0.5; - lumaEndP -= lumaNN * 0.5; - bool doneN = abs(lumaEndN) >= gradientScaled; - bool doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1; - bool doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11; - - if(doneNP) { - if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); - if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); - if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; - if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; - doneN = abs(lumaEndN) >= gradientScaled; - doneP = abs(lumaEndP) >= gradientScaled; - if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12; - if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12; - doneNP = (!doneN) || (!doneP); - if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12; - if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12; - }}}}}}}}}}} - - float dstN = posM.x - posN.x; - float dstP = posP.x - posM.x; - if(!horzSpan) dstN = posM.y - posN.y; - if(!horzSpan) dstP = posP.y - posM.y; - - bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero; - float spanLength = (dstP + dstN); - bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero; - float spanLengthRcp = 1.0/spanLength; - - bool directionN = dstN < dstP; - float dst = min(dstN, dstP); - bool goodSpan = directionN ? goodSpanN : goodSpanP; - float subpixG = subpixF * subpixF; - float pixelOffset = (dst * (-spanLengthRcp)) + 0.5; - float subpixH = subpixG * fxaaSubpix; - - float pixelOffsetGood = goodSpan ? pixelOffset : 0.0; - float pixelOffsetSubpix = max(pixelOffsetGood, subpixH); - if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign; - if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign; - - return float4(FxaaTexTop(tex, posM).xyz, lumaM); -} - -#if (FXAA_GLSL_130 == 1) -float4 FxaaPass(float4 FxaaColor, float2 uv0) -#else -float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0) -#endif -{ - - #if (SHADER_MODEL >= 0x400) - FxaaTex tex; - tex.tex = Texture; - tex.smpl = TextureSampler; - - Texture.GetDimensions(PixelSize.x, PixelSize.y); - FxaaColor = FxaaPixelShader(uv0, tex, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin); - - #elif (FXAA_GLSL_130 == 1) - - vec2 PixelSize = textureSize(TextureSampler, 0); - FxaaColor = FxaaPixelShader(uv0, TextureSampler, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin); - - #else - FxaaTex tex; - tex = TextureSampler; - FxaaColor = FxaaPixelShader(uv0, tex, PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin); - #endif - - return FxaaColor; -} - -/*------------------------------------------------------------------------------ - [MAIN() & COMBINE PASS CODE SECTION] -------------------------------------------------------------------------------*/ -#if (FXAA_GLSL_130 == 1) - -void ps_main() -{ - vec4 color = texture(TextureSampler, PSin.t); - color = PreGammaPass(color, PSin.t); - color = FxaaPass(color, PSin.t); - - SV_Target0 = color; -} - -#else - -PS_OUTPUT ps_main(VS_OUTPUT input) -{ - PS_OUTPUT output; - - #if (SHADER_MODEL >= 0x400) - float4 color = Texture.Sample(TextureSampler, input.t); - - color = PreGammaPass(color, input.t); - color = FxaaPass(color, input.t); - #else - float4 color = tex2D(TextureSampler, input.t); - - color = PreGammaPass(color, input.t); - color = FxaaPass(color, input.t); - #endif - - output.c = color; - - return output; -} - -#endif - -#endif diff --git a/plugins/GSdx_legacy/res/glsl/convert.glsl b/plugins/GSdx_legacy/res/glsl/convert.glsl deleted file mode 100644 index afa59e8816..0000000000 --- a/plugins/GSdx_legacy/res/glsl/convert.glsl +++ /dev/null @@ -1,406 +0,0 @@ -//#version 420 // Keep it for editor detection - -struct vertex_basic -{ - vec4 p; - vec2 t; -}; - - -#ifdef VERTEX_SHADER - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; -#if !pGL_ES - float gl_ClipDistance[1]; -#endif -}; - -layout(location = 0) in vec2 POSITION; -layout(location = 1) in vec2 TEXCOORD0; - -// FIXME set the interpolation (don't know what dx do) -// flat means that there is no interpolation. The value given to the fragment shader is based on the provoking vertex conventions. -// -// noperspective means that there will be linear interpolation in window-space. This is usually not what you want, but it can have its uses. -// -// smooth, the default, means to do perspective-correct interpolation. -// -// The centroid qualifier only matters when multisampling. If this qualifier is not present, then the value is interpolated to the pixel's center, anywhere in the pixel, or to one of the pixel's samples. This sample may lie outside of the actual primitive being rendered, since a primitive can cover only part of a pixel's area. The centroid qualifier is used to prevent this; the interpolation point must fall within both the pixel's area and the primitive's area. -out SHADER -{ - vec4 p; - vec2 t; -} VSout; - -#define VSout_p (VSout.p) -#define VSout_t (VSout.t) - -void vs_main() -{ - VSout_p = vec4(POSITION, 0.5f, 1.0f); - VSout_t = TEXCOORD0; - gl_Position = vec4(POSITION, 0.5f, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position -} - -#endif - -#ifdef FRAGMENT_SHADER - -in SHADER -{ - vec4 p; - vec2 t; -} PSin; - -#define PSin_p (PSin.p) -#define PSin_t (PSin.t) - -// Give a different name so I remember there is a special case! -#if defined(ps_main1) || defined(ps_main10) -layout(location = 0) out uint SV_Target1; -#else -layout(location = 0) out vec4 SV_Target0; -#endif - -layout(binding = 0) uniform sampler2D TextureSampler; - -layout(std140, binding = 15) uniform cb15 -{ - ivec4 ScalingFactor; -}; - -vec4 sample_c() -{ - return texture(TextureSampler, PSin_t); -} - -vec4 ps_crt(uint i) -{ - vec4 mask[4] = vec4[4] - ( - vec4(1, 0, 0, 0), - vec4(0, 1, 0, 0), - vec4(0, 0, 1, 0), - vec4(1, 1, 1, 0) - ); - return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f); -} - -#ifdef ps_main0 -void ps_main0() -{ - SV_Target0 = sample_c(); -} -#endif - -#ifdef ps_main1 -void ps_main1() -{ - // Input Color is RGBA8 - - // We want to output a pixel on the PSMCT16* format - // A1-BGR5 - -#if 0 - // Note: dot is a good idea from pseudo. However we must be careful about float accuraccy. - // Here a global idea example: - // - // SV_Target1 = dot(round(sample_c() * vec4(31.f, 31.f, 31.f, 1.f)), vec4(1.f, 32.f, 1024.f, 32768.f)); - // - - // For me this code is more accurate but it will require some tests - - vec4 c = sample_c() * 255.0f + 0.5f; // Denormalize value to avoid float precision issue - - // shift Red: -3 - // shift Green: -3 + 5 - // shift Blue: -3 + 10 - // shift Alpha: -7 + 15 - highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value - - // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below - SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000)); - -#else - // Old code which is likely wrong. - - vec4 c = sample_c(); - - c.a *= 256.0f / 127.0f; // hm, 0.5 won't give us 1.0 if we just multiply with 2 - - highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000))); - - // bit field operation requires GL4 HW. - SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000)); -#endif - - -} -#endif - -#ifdef ps_main10 -void ps_main10() -{ - // Convert a GL_FLOAT32 depth texture into a 32 bits UINT texture - SV_Target1 = uint(exp2(32.0f) * sample_c().r); -} -#endif - -#ifdef ps_main11 -void ps_main11() -{ - // Convert a GL_FLOAT32 depth texture into a RGBA color texture - const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f)); - const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0); - - vec4 res = fract(vec4(sample_c().r) * bitSh); - - SV_Target0 = (res - res.xxyz * bitMsk) * 256.0f/255.0f; -} -#endif - -#ifdef ps_main12 -void ps_main12() -{ - // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture - const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f)); - const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1); - uvec4 color = uvec4(vec4(sample_c().r) * bitSh) & bitMsk; - - SV_Target0 = vec4(color) / vec4(32.0f, 32.0f, 32.0f, 1.0f); -} -#endif - -#ifdef ps_main13 -void ps_main13() -{ - // Convert a RRGBA texture into a float depth texture - // FIXME: I'm afraid of the accuracy - const vec4 bitSh = vec4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp2(-8.0f)) * vec4(255.0); - gl_FragDepth = dot(sample_c(), bitSh); -} -#endif - -#ifdef ps_main14 -void ps_main14() -{ - // Same as above but without the alpha channel (24 bits Z) - - // Convert a RRGBA texture into a float depth texture - // FIXME: I'm afraid of the accuracy - const vec3 bitSh = vec3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * vec3(255.0); - gl_FragDepth = dot(sample_c().rgb, bitSh); -} -#endif - -#ifdef ps_main15 -void ps_main15() -{ - // Same as above but without the A/B channels (16 bits Z) - - // Convert a RRGBA texture into a float depth texture - // FIXME: I'm afraid of the accuracy - const vec2 bitSh = vec2(exp2(-32.0f), exp2(-24.0f)) * vec2(255.0); - gl_FragDepth = dot(sample_c().rg, bitSh); -} -#endif - -#ifdef ps_main16 -void ps_main16() -{ - // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z - // FIXME: I'm afraid of the accuracy - const vec4 bitSh = vec4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp2(-17.0f)); - // Trunc color to drop useless lsb - vec4 color = trunc(sample_c() * vec4(255.0f) / vec4(8.0f, 8.0f, 8.0f, 128.0f)); - gl_FragDepth = dot(vec4(color), bitSh); -} -#endif - -#ifdef ps_main17 -void ps_main17() -{ - - // Potential speed optimization. There is a high probability that - // game only want to extract a single channel (blue). It will allow - // to remove most of the conditional operation and yield a +2/3 fps - // boost on MGS3 - // - // Hypothesis wrong in Prince of Persia ... Seriously WTF ! - //#define ONLY_BLUE; - - // Convert a RGBA texture into a 8 bits packed texture - // Input column: 8x2 RGBA pixels - // 0: 8 RGBA - // 1: 8 RGBA - // Output column: 16x4 Index pixels - // 0: 8 R | 8 B - // 1: 8 R | 8 B - // 2: 8 G | 8 A - // 3: 8 G | 8 A - float c; - - uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u); - ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1); - - int ty = tb.y | (int(gl_FragCoord.y) & 1); - int txN = tb.x | (int(gl_FragCoord.x) & 7); - int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7); - - txN *= ScalingFactor.x; - txH *= ScalingFactor.x; - ty *= ScalingFactor.y; - - // TODO investigate texture gather - vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0); - vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0); - - - if ((sel.y & 4u) == 0u) { - // Column 0 and 2 -#ifdef ONLY_BLUE - c = cN.b; -#else - if ((sel.y & 3u) < 2u) { - // first 2 lines of the col - if (sel.x < 8u) - c = cN.r; - else - c = cN.b; - } else { - if (sel.x < 8u) - c = cH.g; - else - c = cH.a; - } -#endif - } else { -#ifdef ONLY_BLUE - c = cH.b; -#else - // Column 1 and 3 - if ((sel.y & 3u) < 2u) { - // first 2 lines of the col - if (sel.x < 8u) - c = cH.r; - else - c = cH.b; - } else { - if (sel.x < 8u) - c = cN.g; - else - c = cN.a; - } -#endif - } - - - SV_Target0 = vec4(c); -} -#endif - -#ifdef ps_main7 -void ps_main7() -{ - vec4 c = sample_c(); - - c.a = dot(c.rgb, vec3(0.299, 0.587, 0.114)); - - SV_Target0 = c; -} -#endif - -#ifdef ps_main5 -vec4 ps_scanlines(uint i) -{ - vec4 mask[2] = - { - vec4(1, 1, 1, 0), - vec4(0, 0, 0, 0) - }; - - return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f); -} - -void ps_main5() // scanlines -{ - highp uvec4 p = uvec4(gl_FragCoord); - - vec4 c = ps_scanlines(p.y % 2u); - - SV_Target0 = c; -} -#endif - -#ifdef ps_main6 -void ps_main6() // diagonal -{ - highp uvec4 p = uvec4(gl_FragCoord); - - vec4 c = ps_crt((p.x + (p.y % 3u)) % 3u); - - SV_Target0 = c; -} -#endif - -#ifdef ps_main8 -void ps_main8() // triangular -{ - highp uvec4 p = uvec4(gl_FragCoord); - - vec4 c = ps_crt(((p.x + ((p.y >> 1u) & 1u) * 3u) >> 1u) % 3u); - - SV_Target0 = c; -} -#endif - -#ifdef ps_main9 -void ps_main9() -{ - - const float PI = 3.14159265359f; - - vec2 texdim = vec2(textureSize(TextureSampler, 0)); - - vec4 c; - if (dFdy(PSin_t.y) * PSin_t.y > 0.5f) { - c = sample_c(); - } else { - float factor = (0.9f - 0.4f * cos(2.0f * PI * PSin_t.y * texdim.y)); - c = factor * texture(TextureSampler, vec2(PSin_t.x, (floor(PSin_t.y * texdim.y) + 0.5f) / texdim.y)); - } - - SV_Target0 = c; -} -#endif - -// Used for DATE (stencil) -// DATM == 1 -#ifdef ps_main2 -void ps_main2() -{ - if(sample_c().a < (127.5f / 255.0f)) // >= 0x80 pass - discard; -} -#endif - -// Used for DATE (stencil) -// DATM == 0 -#ifdef ps_main3 -void ps_main3() -{ - if((127.5f / 255.0f) < sample_c().a) // < 0x80 pass (== 0x80 should not pass) - discard; -} -#endif - -#ifdef ps_main4 -void ps_main4() -{ - SV_Target0 = mod(round(sample_c() * 255.0f), 256.0f) / 255.0f; -} -#endif - -#endif diff --git a/plugins/GSdx_legacy/res/glsl/fxaa.fx b/plugins/GSdx_legacy/res/glsl/fxaa.fx deleted file mode 120000 index f8a26fe1af..0000000000 --- a/plugins/GSdx_legacy/res/glsl/fxaa.fx +++ /dev/null @@ -1 +0,0 @@ -../fxaa.fx \ No newline at end of file diff --git a/plugins/GSdx_legacy/res/glsl/interlace.glsl b/plugins/GSdx_legacy/res/glsl/interlace.glsl deleted file mode 100644 index 8a79806fef..0000000000 --- a/plugins/GSdx_legacy/res/glsl/interlace.glsl +++ /dev/null @@ -1,67 +0,0 @@ -//#version 420 // Keep it for editor detection - -struct vertex_basic -{ - vec4 p; - vec2 t; -}; - -in SHADER -{ - vec4 p; - vec2 t; -} PSin; - -#define PSin_p (PSin.p) -#define PSin_t (PSin.t) - -#ifdef FRAGMENT_SHADER - -layout(location = 0) out vec4 SV_Target0; - -layout(std140, binding = 11) uniform cb11 -{ - vec2 ZrH; - float hH; -}; - -layout(binding = 0) uniform sampler2D TextureSampler; - -// TODO ensure that clip (discard) is < 0 and not <= 0 ??? -void ps_main0() -{ - if (fract(PSin_t.y * hH) - 0.5 < 0.0) - discard; - // I'm not sure it impact us but be safe to lookup texture before conditional if - // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control - vec4 c = texture(TextureSampler, PSin_t); - - SV_Target0 = c; -} - -void ps_main1() -{ - if (0.5 - fract(PSin_t.y * hH) < 0.0) - discard; - // I'm not sure it impact us but be safe to lookup texture before conditional if - // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control - vec4 c = texture(TextureSampler, PSin_t); - - SV_Target0 = c; -} - -void ps_main2() -{ - vec4 c0 = texture(TextureSampler, PSin_t - ZrH); - vec4 c1 = texture(TextureSampler, PSin_t); - vec4 c2 = texture(TextureSampler, PSin_t + ZrH); - - SV_Target0 = (c0 + c1 * 2.0f + c2) / 4.0f; -} - -void ps_main3() -{ - SV_Target0 = texture(TextureSampler, PSin_t); -} - -#endif diff --git a/plugins/GSdx_legacy/res/glsl/merge.glsl b/plugins/GSdx_legacy/res/glsl/merge.glsl deleted file mode 100644 index 31ba8abdc3..0000000000 --- a/plugins/GSdx_legacy/res/glsl/merge.glsl +++ /dev/null @@ -1,44 +0,0 @@ -//#version 420 // Keep it for editor detection - -struct vertex_basic -{ - vec4 p; - vec2 t; -}; - -in SHADER -{ - vec4 p; - vec2 t; -} PSin; - -#define PSin_p (PSin.p) -#define PSin_t (PSin.t) - -#ifdef FRAGMENT_SHADER - -layout(location = 0) out vec4 SV_Target0; - -layout(std140, binding = 10) uniform cb10 -{ - vec4 BGColor; -}; - -layout(binding = 0) uniform sampler2D TextureSampler; - -void ps_main0() -{ - vec4 c = texture(TextureSampler, PSin_t); - // Note: clamping will be done by fixed unit - c.a *= 2.0f; - SV_Target0 = c; -} - -void ps_main1() -{ - vec4 c = texture(TextureSampler, PSin_t); - c.a = BGColor.a; - SV_Target0 = c; -} - -#endif diff --git a/plugins/GSdx_legacy/res/glsl/nvidia_throughput.txt b/plugins/GSdx_legacy/res/glsl/nvidia_throughput.txt deleted file mode 100644 index ee824f8d19..0000000000 --- a/plugins/GSdx_legacy/res/glsl/nvidia_throughput.txt +++ /dev/null @@ -1,26 +0,0 @@ -Table 2. Throughput of Native Arithmetic Instructions. (Number of Operations per Clock Cycle per Multiprocessor) Compute Capability - -Architecture , FER , FER , KPL , MAX -32-bit floating-point add multiply multiply-add , 32 , 48 , 192 , 128 -64-bit floating-point add multiply multiply-add , 16 , 4 , 8 , 1 -32-bit floating-point reciprocal reciprocal square root log2f/exp2f/sine/cosine , 4 , 8 , 32 , 32 -32-bit integer add extended-precision add subtract extended-precision subtract , 32 , 48 , 160 , 128 -32-bit integer multiply multiply-add extended-precision multiply-add , 16 , 16 , 32 , Multiple instructions -32-bit integer shift , 16 , 16 , 32 , 64 -compare minimum maximum , 32 , 48 , 160 , 64 -32-bit integer bit reverse bit field extract/insert , 16 , 16 , 32 , 64 -32-bit bitwise AND / OR / XOR , 32 , 160 , 160 , 128 -count of leading zeros most significant non-sign bit , 16 , 16 , 32 , Multiple instructions -population count , 16 , 16 , 32 , 32 -warp shuffle , N/A , N/A , 32 , 32 -sum of absolute difference , 16 , 16 , 32 , 64 -SIMD video instructions vabsdiff2 , N/A , N/A , 160 , Multiple instructions -SIMD video instructions vabsdiff4 , N/A , N/A , 160 , Multiple instructions -All other SIMD video instructions , 16 , 16 , 32 , Multiple instructions -Type conversions from 8/16-bit integer to 32-bit types , 16 , 16 , 128 , 32 -Type conversions from and to 64-bit types , 16 , 4 , 8 , 4 -All other type conversions , 16 , 16 , 32 , 32 - - -Some tips: -* bit field operations are as fast as shift operations. diff --git a/plugins/GSdx_legacy/res/glsl/shadeboost.glsl b/plugins/GSdx_legacy/res/glsl/shadeboost.glsl deleted file mode 100644 index 717e8fe133..0000000000 --- a/plugins/GSdx_legacy/res/glsl/shadeboost.glsl +++ /dev/null @@ -1,71 +0,0 @@ -//#version 420 // Keep it for editor detection - -/* -** Contrast, saturation, brightness -** Code of this function is from TGM's shader pack -** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057 -** TGM's author comment about the license (included in the previous link) -** "do with it, what you want! its total free! -** (but would be nice, if you say that you used my shaders :wink: ) but not necessary" -*/ - -struct vertex_basic -{ - vec4 p; - vec2 t; -}; - -#ifdef FRAGMENT_SHADER - -in SHADER -{ - vec4 p; - vec2 t; -} PSin; - -#define PSin_p (PSin.p) -#define PSin_t (PSin.t) - -layout(location = 0) out vec4 SV_Target0; - -layout(std140, binding = 12) uniform cb12 -{ - vec4 BGColor; -}; - -layout(binding = 0) uniform sampler2D TextureSampler; - -// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% -vec4 ContrastSaturationBrightness(vec4 color) -{ - const float sat = SB_SATURATION / 50.0; - const float brt = SB_BRIGHTNESS / 50.0; - const float con = SB_CONTRAST / 50.0; - - // Increase or decrease these values to adjust r, g and b color channels separately - const float AvgLumR = 0.5; - const float AvgLumG = 0.5; - const float AvgLumB = 0.5; - - const vec3 LumCoeff = vec3(0.2125, 0.7154, 0.0721); - - vec3 AvgLumin = vec3(AvgLumR, AvgLumG, AvgLumB); - vec3 brtColor = color.rgb * brt; - float dot_intensity = dot(brtColor, LumCoeff); - vec3 intensity = vec3(dot_intensity, dot_intensity, dot_intensity); - vec3 satColor = mix(intensity, brtColor, sat); - vec3 conColor = mix(AvgLumin, satColor, con); - - color.rgb = conColor; - return color; -} - - -void ps_main() -{ - vec4 c = texture(TextureSampler, PSin_t); - SV_Target0 = ContrastSaturationBrightness(c); -} - - -#endif diff --git a/plugins/GSdx_legacy/res/glsl/tfx_fs.glsl b/plugins/GSdx_legacy/res/glsl/tfx_fs.glsl deleted file mode 100644 index cb756352d0..0000000000 --- a/plugins/GSdx_legacy/res/glsl/tfx_fs.glsl +++ /dev/null @@ -1,614 +0,0 @@ -//#version 420 // Keep it for text editor detection - -// Require for bit operation -//#extension GL_ARB_gpu_shader5 : enable - -#define FMT_32 0 -#define FMT_24 1 -#define FMT_16 2 - -#define PS_PAL_FMT (PS_TEX_FMT >> 2) -#define PS_AEM_FMT (PS_TEX_FMT & 3) - -// APITRACE_DEBUG enables forced pixel output to easily detect -// the fragment computed by primitive -#define APITRACE_DEBUG 0 -// TEX_COORD_DEBUG output the uv coordinate as color. It is useful -// to detect bad sampling due to upscaling -//#define TEX_COORD_DEBUG -// Just copy directly the texture coordinate -#ifdef TEX_COORD_DEBUG -#define PS_TFX 1 -#define PS_TCC 1 -#endif - -#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) - -#ifdef FRAGMENT_SHADER - -in SHADER -{ - vec4 t_float; - vec4 t_int; - vec4 c; - flat vec4 fc; -} PSin; - -#define PSin_c (PSin.c) -#define PSin_fc (PSin.fc) - -// Same buffer but 2 colors for dual source blending -layout(location = 0, index = 0) out vec4 SV_Target0; -layout(location = 0, index = 1) out vec4 SV_Target1; - -layout(binding = 0) uniform sampler2D TextureSampler; -layout(binding = 1) uniform sampler2D PaletteSampler; -layout(binding = 3) uniform sampler2D RtSampler; // note 2 already use by the image below - -#ifndef DISABLE_GL42_image -#if PS_DATE > 0 -// FIXME how to declare memory access -layout(r32i, binding = 2) uniform iimage2D img_prim_min; -// WARNING: -// You can't enable it if you discard the fragment. The depth is still -// updated (shadow in Shin Megami Tensei Nocturne) -// -// early_fragment_tests must still be enabled in the first pass of the 2 passes algo -// First pass search the first primitive that will write the bad alpha value. Value -// won't be written if the fragment fails the depth test. -// -// In theory the best solution will be do -// 1/ copy the depth buffer -// 2/ do the full depth (current depth writes are disabled) -// 3/ restore the depth buffer for 2nd pass -// Of course, it is likely too costly. -#if PS_DATE == 1 || PS_DATE == 2 -layout(early_fragment_tests) in; -#endif - -// I don't remember why I set this parameter but it is surely useless -//layout(pixel_center_integer) in vec4 gl_FragCoord; -#endif -#else -// use basic stencil -#endif - - -// Warning duplicated in both GLSL file -layout(std140, binding = 21) uniform cb21 -{ - vec3 FogColor; - float AREF; - - vec4 WH; - - vec2 TA; - float _pad0; - float Af; - - uvec4 MskFix; - - uvec4 FbMask; - - vec4 HalfTexel; - - vec4 MinMax; - - vec2 TextureScale; - vec2 TC_OffsetHack; -}; - -vec4 sample_c(vec2 uv) -{ - return texture(TextureSampler, uv); -} - -vec4 sample_p(float idx) -{ - return texture(PaletteSampler, vec2(idx, 0.0f)); -} - -vec4 clamp_wrap_uv(vec4 uv) -{ - vec4 uv_out = uv; - -#if PS_WMS == PS_WMT - -#if PS_WMS == 2 - uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); -#elif PS_WMS == 3 - uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy; -#endif - -#else // PS_WMS != PS_WMT - -#if PS_WMS == 2 - uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - -#elif PS_WMS == 3 - uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; - -#endif - -#if PS_WMT == 2 - uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - -#elif PS_WMT == 3 - - uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; -#endif - -#endif - - return uv_out; -} - -mat4 sample_4c(vec4 uv) -{ - mat4 c; - - // Note: texture gather can't be used because of special clamping/wrapping - // Also it doesn't support lod - c[0] = sample_c(uv.xy); - c[1] = sample_c(uv.zy); - c[2] = sample_c(uv.xw); - c[3] = sample_c(uv.zw); - - return c; -} - -vec4 sample_4_index(vec4 uv) -{ - vec4 c; - - // Either GSdx will send a texture that contains a single channel - // in this case the red channel is remapped as alpha channel - // - // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel - - // Note: texture gather can't be used because of special clamping/wrapping - // Also it doesn't support lod - c.x = sample_c(uv.xy).a; - c.y = sample_c(uv.zy).a; - c.z = sample_c(uv.xw).a; - c.w = sample_c(uv.zw).a; - - uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value - -#if PS_PAL_FMT == 1 - // 4HL - return vec4(i & 0xFu) / 255.0f; - -#elif PS_PAL_FMT == 2 - // 4HH - return vec4(i >> 4u) / 255.0f; - -#else - // Most of texture will hit this code so keep normalized float value - - // 8 bits - return c; -#endif - -} - -mat4 sample_4p(vec4 u) -{ - mat4 c; - - c[0] = sample_p(u.x); - c[1] = sample_p(u.y); - c[2] = sample_p(u.z); - c[3] = sample_p(u.w); - - return c; -} - -vec4 sample_color(vec2 st) -{ -#if (PS_TCOFFSETHACK == 1) - st += TC_OffsetHack.xy; -#endif - - vec4 t; - mat4 c; - vec2 dd; - - // FIXME I'm not sure this condition is useful (I think code will be optimized) -#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2) - // No software LTF and pure 32 bits RGBA texure without special texture wrapping - c[0] = sample_c(st); -#ifdef TEX_COORD_DEBUG - c[0].rg = st.xy; -#endif - -#else - vec4 uv; - - if(PS_LTF != 0) - { - uv = st.xyxy + HalfTexel; - dd = fract(uv.xy * WH.zw); -#if (PS_FST == 0) - // Background in Shin Megami Tensei Lucifers - // I suspect that uv isn't a standard number, so fract is outside of the [0;1] range - // Note: it is free on GPU but let's do it only for float coordinate - // Strangely Dx doesn't suffer from this issue. - dd = clamp(dd, vec2(0.0f), vec2(1.0f)); -#endif - } - else - { - uv = st.xyxy; - } - - uv = clamp_wrap_uv(uv); - -#if PS_PAL_FMT != 0 - c = sample_4p(sample_4_index(uv)); -#else - c = sample_4c(uv); -#endif - -#ifdef TEX_COORD_DEBUG - c[0].rg = uv.xy; - c[1].rg = uv.xy; - c[2].rg = uv.xy; - c[3].rg = uv.xy; -#endif - -#endif - - // PERF note: using dot product reduces by 1 the number of instruction - // but I'm not sure it is equivalent neither faster. - for (int i = 0; i < 4; i++) - { - //float sum = dot(c[i].rgb, vec3(1.0f)); -#if (PS_AEM_FMT == FMT_24) - c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; - //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; -#elif (PS_AEM_FMT == FMT_16) - c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; - //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; -#endif - } - -#if(PS_LTF != 0) - t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y); -#else - t = c[0]; -#endif - - // The 0.05f helps to fix the overbloom of sotc - // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit) - // interpolation could be slightly below the correct one. - return trunc(t * 255.0f + 0.05f); -} - -vec4 tfx(vec4 T, vec4 C) -{ - vec4 C_out; - vec4 FxT = trunc(trunc(C) * T / 128.0f); - -#if (PS_TFX == 0) - C_out = FxT; -#elif (PS_TFX == 1) - C_out = T; -#elif (PS_TFX == 2) - C_out.rgb = FxT.rgb + C.a; - C_out.a = T.a + C.a; -#elif (PS_TFX == 3) - C_out.rgb = FxT.rgb + C.a; - C_out.a = T.a; -#else - C_out = C; -#endif - -#if (PS_TCC == 0) - C_out.a = C.a; -#endif - -#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3) - // Clamp only when it is useful - C_out = min(C_out, 255.0f); -#endif - - return C_out; -} - -void atst(vec4 C) -{ - // FIXME use integer cmp - float a = C.a; - -#if (PS_ATST == 0) // never - discard; -#elif (PS_ATST == 1) // always - // nothing to do -#elif (PS_ATST == 2) // l - if ((AREF - a - 0.5f) < 0.0f) - discard; -#elif (PS_ATST == 3 ) // le - if ((AREF - a + 0.5f) < 0.0f) - discard; -#elif (PS_ATST == 4) // e - if ((0.5f - abs(a - AREF)) < 0.0f) - discard; -#elif (PS_ATST == 5) // ge - if ((a-AREF + 0.5f) < 0.0f) - discard; -#elif (PS_ATST == 6) // g - if ((a-AREF - 0.5f) < 0.0f) - discard; -#elif (PS_ATST == 7) // ne - if ((abs(a - AREF) - 0.5f) < 0.0f) - discard; -#endif -} - -void fog(inout vec4 C, float f) -{ -#if PS_FOG != 0 - C.rgb = trunc(mix(FogColor, C.rgb, f)); -#endif -} - -vec4 ps_color() -{ - //FIXME: maybe we can set gl_Position.w = q in VS -#if (PS_FST == 0) - vec4 T = sample_color(PSin.t_float.xy / vec2(PSin.t_float.w)); -#else - // Note xy are normalized coordinate - vec4 T = sample_color(PSin.t_int.xy); -#endif - -#if PS_IIP == 1 - vec4 C = tfx(T, PSin_c); -#else - vec4 C = tfx(T, PSin_fc); -#endif - - atst(C); - - fog(C, PSin.t_float.z); - -#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes - C.rgb = vec3(255.0f); -#endif - - return C; -} - -void ps_fbmask(inout vec4 C) -{ - // FIXME do I need special case for 16 bits -#if PS_FBMASK - vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); - C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); -#endif -} - -void ps_blend(inout vec4 Color, float As) -{ -#if SW_BLEND - vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); - -#if PS_DFMT == FMT_24 - float Ad = 1.0f; -#else - // FIXME FMT_16 case - // FIXME Ad or Ad * 2? - float Ad = RT.a / 128.0f; -#endif - - // Let the compiler do its jobs ! - vec3 Cd = RT.rgb; - vec3 Cs = Color.rgb; - -#if PS_BLEND_A == 0 - vec3 A = Cs; -#elif PS_BLEND_A == 1 - vec3 A = Cd; -#else - vec3 A = vec3(0.0f); -#endif - -#if PS_BLEND_B == 0 - vec3 B = Cs; -#elif PS_BLEND_B == 1 - vec3 B = Cd; -#else - vec3 B = vec3(0.0f); -#endif - -#if PS_BLEND_C == 0 - float C = As; -#elif PS_BLEND_C == 1 - float C = Ad; -#else - float C = Af; -#endif - -#if PS_BLEND_D == 0 - vec3 D = Cs; -#elif PS_BLEND_D == 1 - vec3 D = Cd; -#else - vec3 D = vec3(0.0f); -#endif - -#if PS_BLEND_A == PS_BLEND_B - Color.rgb = D; -#else - Color.rgb = trunc((A - B) * C + D); -#endif - - // FIXME dithering - - // Correct the Color value based on the output format -#if PS_COLCLIP == 0 && PS_HDR == 0 - // Standard Clamp - Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f)); -#endif - - // FIXME rouding of negative float? - // compiler uses trunc but it might need floor - - // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy - // GS: Color = 1, Alpha = 255 => output 1 - // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 -#if PS_DFMT == FMT_16 - // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania - - Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8)); -#elif PS_COLCLIP == 1 && PS_HDR == 0 - Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF)); -#endif - -#endif -} - -void ps_main() -{ -#if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2) - -#if PS_WRITE_RG == 1 - // Pseudo 16 bits access. - float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g; -#else - float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a; -#endif - -#if (PS_DATE & 3) == 1 - // DATM == 0: Pixel with alpha equal to 1 will failed - bool bad = (127.5f / 255.0f) < rt_a; -#elif (PS_DATE & 3) == 2 - // DATM == 1: Pixel with alpha equal to 0 will failed - bool bad = rt_a < (127.5f / 255.0f); -#endif - - if (bad) { -#if PS_DATE >= 5 || defined(DISABLE_GL42_image) - discard; -#else - imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1)); - return; -#endif - } - -#endif - -#if PS_DATE == 3 && !defined(DISABLE_GL42_image) - int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r; - // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update - // the bad alpha value so we must keep it. - - if (gl_PrimitiveID > stencil_ceil) { - discard; - } -#endif - - vec4 C = ps_color(); -#if (APITRACE_DEBUG & 1) == 1 - C.r = 255f; -#endif -#if (APITRACE_DEBUG & 2) == 2 - C.g = 255f; -#endif -#if (APITRACE_DEBUG & 4) == 4 - C.b = 255f; -#endif -#if (APITRACE_DEBUG & 8) == 8 - C.a = 128f; -#endif - -#if PS_SHUFFLE - uvec4 denorm_c = uvec4(C); - uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); - - // Write RB part. Mask will take care of the correct destination -#if PS_READ_BA - C.rb = C.bb; -#else - C.rb = C.rr; -#endif - - // FIXME precompute my_TA & 0x80 - - // Write GA part. Mask will take care of the correct destination - // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n" - // However Nvidia emulate it with an if (at least on kepler arch) ...\n" -#if PS_READ_BA - // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below - // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x; - // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1); - // c.ga = vec2(float(denorm_c.a)); - - if (bool(denorm_c.a & 0x80u)) - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); - -#else - if (bool(denorm_c.g & 0x80u)) - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); - - // Nice idea but step/mix requires 4 instructions - // set / trunc / I2F / Mad - // - // float sel = step(128.0f, c.g); - // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)); - // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel); -#endif - -#endif - - // Must be done before alpha correction - float alpha_blend = C.a / 128.0f; - - // Correct the ALPHA value based on the output format -#if (PS_DFMT == FMT_16) - float A_one = 128.0f; // alpha output will be 0x80 - C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one; -#elif (PS_DFMT == FMT_32) && (PS_FBA != 0) - if(C.a < 128.0f) C.a += 128.0f; -#endif - - // Get first primitive that will write a failling alpha value -#if PS_DATE == 1 && !defined(DISABLE_GL42_image) - // DATM == 0 - // Pixel with alpha equal to 1 will failed (128-255) - if (C.a > 127.5f) { - imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); - } - return; -#elif PS_DATE == 2 && !defined(DISABLE_GL42_image) - // DATM == 1 - // Pixel with alpha equal to 0 will failed (0-127) - if (C.a < 127.5f) { - imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); - } - return; -#endif - - ps_blend(C, alpha_blend); - - ps_fbmask(C); - -#if PS_HDR == 1 - // Use negative value to avoid overflow of the texture (in accumulation mode) - // Note: code were initially done for an Half-Float texture. Due to overflow - // the texture was upgraded to a full float. Maybe this code is useless now! - // Good testcase is castlevania - if (any(greaterThan(C.rgb, vec3(128.0f)))) { - C.rgb = (C.rgb - 256.0f); - } -#endif - SV_Target0 = C / 255.0f; - SV_Target1 = vec4(alpha_blend); -} - -#endif diff --git a/plugins/GSdx_legacy/res/glsl/tfx_vgs.glsl b/plugins/GSdx_legacy/res/glsl/tfx_vgs.glsl deleted file mode 100644 index c89720d644..0000000000 --- a/plugins/GSdx_legacy/res/glsl/tfx_vgs.glsl +++ /dev/null @@ -1,263 +0,0 @@ -//#version 420 // Keep it for text editor detection - -layout(std140, binding = 20) uniform cb20 -{ - vec2 VertexScale; - vec2 VertexOffset; - vec2 _removed_TextureScale; - vec2 PointSize; -}; - -// Warning duplicated in both GLSL file -layout(std140, binding = 21) uniform cb21 -{ - vec3 FogColor; - float AREF; - - vec4 WH; - - vec2 TA; - float _pad0; - float Af; - - uvec4 MskFix; - - uvec4 FbMask; - - vec4 HalfTexel; - - vec4 MinMax; - - vec2 TextureScale; - vec2 TC_OffsetHack; -}; - -#ifdef VERTEX_SHADER -layout(location = 0) in vec2 i_st; -layout(location = 2) in vec4 i_c; -layout(location = 3) in float i_q; -layout(location = 4) in uvec2 i_p; -layout(location = 5) in uint i_z; -layout(location = 6) in uvec2 i_uv; -layout(location = 7) in vec4 i_f; - -out SHADER -{ - vec4 t_float; - vec4 t_int; - vec4 c; - flat vec4 fc; -} VSout; - -#define VSout_c (VSout.c) -#define VSout_fc (VSout.fc) - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; -#if !pGL_ES - float gl_ClipDistance[1]; -#endif -}; - -#ifdef ZERO_TO_ONE_DEPTH -const float exp_min32 = exp2(-32.0f); -#else -const float exp_min31 = exp2(-31.0f); -#endif - -void texture_coord() -{ - vec2 uv = (VS_WILDHACK == 1) ? vec2(i_uv & uvec2(0x3FEF, 0x3FEF)) : vec2(i_uv); - - // Float coordinate - VSout.t_float.xy = i_st; - VSout.t_float.w = i_q; - - // Integer coordinate => normalized - VSout.t_int.xy = uv * TextureScale; - // Integer coordinate => integral - VSout.t_int.zw = uv; -} - -void vs_main() -{ - highp uint z; - if(VS_BPPZ == 1) // 24 - z = i_z & uint(0xffffff); - else if(VS_BPPZ == 2) // 16 - z = i_z & uint(0xffff); - else - z = i_z; - - // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go) - // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty - // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel - // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133 - vec4 p; - - p.xy = vec2(i_p) - vec2(0.05f, 0.05f); - p.xy = p.xy * VertexScale - VertexOffset; - p.w = 1.0f; -#ifdef ZERO_TO_ONE_DEPTH - if(VS_LOGZ == 1) { - p.z = max(0.0f, log2(float(z))) / 32.0f; - } else { - p.z = float(z) * exp_min32; - } -#else - if(VS_LOGZ == 1) { - p.z = max(0.0f, log2(float(z))) / 31.0f - 1.0f; - } else { - p.z = float(z) * exp_min31 - 1.0f; - } -#endif - - gl_Position = p; - - texture_coord(); - - VSout_c = i_c; - VSout_fc = i_c; - VSout.t_float.z = i_f.x; // pack for with texture -} - -#endif - -#ifdef GEOMETRY_SHADER - -in gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; -#if !pGL_ES - float gl_ClipDistance[1]; -#endif -} gl_in[]; -//in int gl_PrimitiveIDIn; - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; -#if !pGL_ES - float gl_ClipDistance[1]; -#endif -}; -//out int gl_PrimitiveID; - -in SHADER -{ - vec4 t_float; - vec4 t_int; - vec4 c; - flat vec4 fc; -} GSin[]; - -out SHADER -{ - vec4 t_float; - vec4 t_int; - vec4 c; - flat vec4 fc; -} GSout; - -layout(std140, binding = 22) uniform cb22 -{ - vec4 rt_size; -}; - - -struct vertex -{ - vec4 t_float; - vec4 t_int; - vec4 c; -}; - -void out_vertex(in vertex v) -{ - GSout.t_float = v.t_float; - GSout.t_int = v.t_int; - GSout.c = v.c; - // Flat output -#if GS_POINT == 1 - GSout.fc = GSin[0].fc; -#else - GSout.fc = GSin[1].fc; -#endif - gl_PrimitiveID = gl_PrimitiveIDIn; - EmitVertex(); -} - -#if GS_POINT == 1 -layout(points) in; -#else -layout(lines) in; -#endif -layout(triangle_strip, max_vertices = 6) out; - -void gs_main() -{ - // left top => GSin[0]; - // right bottom => GSin[1]; -#if GS_POINT == 1 - vertex rb = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c); -#else - vertex rb = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c); -#endif - vertex lt = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c); - -#if GS_POINT == 1 - vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f); -#else - vec4 rb_p = gl_in[1].gl_Position; -#endif - vec4 lb_p = rb_p; - vec4 rt_p = rb_p; - vec4 lt_p = gl_in[0].gl_Position; - -#if GS_POINT == 0 - // flat depth - lt_p.z = rb_p.z; - // flat fog and texture perspective - lt.t_float.zw = rb.t_float.zw; - // flat color - lt.c = rb.c; -#endif - - // Swap texture and position coordinate - vertex lb = rb; - lb.t_float.x = lt.t_float.x; - lb.t_int.x = lt.t_int.x; - lb.t_int.z = lt.t_int.z; - lb_p.x = lt_p.x; - - vertex rt = rb; - rt_p.y = lt_p.y; - rt.t_float.y = lt.t_float.y; - rt.t_int.y = lt.t_int.y; - rt.t_int.w = lt.t_int.w; - - // Triangle 1 - gl_Position = lt_p; - out_vertex(lt); - - gl_Position = lb_p; - out_vertex(lb); - - gl_Position = rt_p; - out_vertex(rt); - EndPrimitive(); - - // Triangle 2 - gl_Position = lb_p; - out_vertex(lb); - - gl_Position = rt_p; - out_vertex(rt); - - gl_Position = rb_p; - out_vertex(rb); - EndPrimitive(); -} - -#endif diff --git a/plugins/GSdx_legacy/res/glsl_source.h b/plugins/GSdx_legacy/res/glsl_source.h deleted file mode 100644 index f2fc92f1be..0000000000 --- a/plugins/GSdx_legacy/res/glsl_source.h +++ /dev/null @@ -1,2097 +0,0 @@ -/* - * This file was generated by glsl2h.pl script - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#include "stdafx.h" - -static const char* convert_glsl = - "//#version 420 // Keep it for editor detection\n" - "\n" - "struct vertex_basic\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "};\n" - "\n" - "\n" - "#ifdef VERTEX_SHADER\n" - "\n" - "out gl_PerVertex {\n" - " vec4 gl_Position;\n" - " float gl_PointSize;\n" - "#if !pGL_ES\n" - " float gl_ClipDistance[1];\n" - "#endif\n" - "};\n" - "\n" - "layout(location = 0) in vec2 POSITION;\n" - "layout(location = 1) in vec2 TEXCOORD0;\n" - "\n" - "// FIXME set the interpolation (don't know what dx do)\n" - "// flat means that there is no interpolation. The value given to the fragment shader is based on the provoking vertex conventions.\n" - "//\n" - "// noperspective means that there will be linear interpolation in window-space. This is usually not what you want, but it can have its uses.\n" - "//\n" - "// smooth, the default, means to do perspective-correct interpolation.\n" - "//\n" - "// The centroid qualifier only matters when multisampling. If this qualifier is not present, then the value is interpolated to the pixel's center, anywhere in the pixel, or to one of the pixel's samples. This sample may lie outside of the actual primitive being rendered, since a primitive can cover only part of a pixel's area. The centroid qualifier is used to prevent this; the interpolation point must fall within both the pixel's area and the primitive's area.\n" - "out SHADER\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "} VSout;\n" - "\n" - "#define VSout_p (VSout.p)\n" - "#define VSout_t (VSout.t)\n" - "\n" - "void vs_main()\n" - "{\n" - " VSout_p = vec4(POSITION, 0.5f, 1.0f);\n" - " VSout_t = TEXCOORD0;\n" - " gl_Position = vec4(POSITION, 0.5f, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n" - "}\n" - "\n" - "#endif\n" - "\n" - "#ifdef FRAGMENT_SHADER\n" - "\n" - "in SHADER\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "} PSin;\n" - "\n" - "#define PSin_p (PSin.p)\n" - "#define PSin_t (PSin.t)\n" - "\n" - "// Give a different name so I remember there is a special case!\n" - "#if defined(ps_main1) || defined(ps_main10)\n" - "layout(location = 0) out uint SV_Target1;\n" - "#else\n" - "layout(location = 0) out vec4 SV_Target0;\n" - "#endif\n" - "\n" - "layout(binding = 0) uniform sampler2D TextureSampler;\n" - "\n" - "layout(std140, binding = 15) uniform cb15\n" - "{\n" - " ivec4 ScalingFactor;\n" - "};\n" - "\n" - "vec4 sample_c()\n" - "{\n" - " return texture(TextureSampler, PSin_t);\n" - "}\n" - "\n" - "vec4 ps_crt(uint i)\n" - "{\n" - " vec4 mask[4] = vec4[4]\n" - " (\n" - " vec4(1, 0, 0, 0),\n" - " vec4(0, 1, 0, 0),\n" - " vec4(0, 0, 1, 0),\n" - " vec4(1, 1, 1, 0)\n" - " );\n" - " return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n" - "}\n" - "\n" - "#ifdef ps_main0\n" - "void ps_main0()\n" - "{\n" - " SV_Target0 = sample_c();\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main1\n" - "void ps_main1()\n" - "{\n" - " // Input Color is RGBA8\n" - "\n" - " // We want to output a pixel on the PSMCT16* format\n" - " // A1-BGR5\n" - "\n" - "#if 0\n" - " // Note: dot is a good idea from pseudo. However we must be careful about float accuraccy.\n" - " // Here a global idea example:\n" - " //\n" - " // SV_Target1 = dot(round(sample_c() * vec4(31.f, 31.f, 31.f, 1.f)), vec4(1.f, 32.f, 1024.f, 32768.f));\n" - " //\n" - "\n" - " // For me this code is more accurate but it will require some tests\n" - "\n" - " vec4 c = sample_c() * 255.0f + 0.5f; // Denormalize value to avoid float precision issue\n" - "\n" - " // shift Red: -3\n" - " // shift Green: -3 + 5\n" - " // shift Blue: -3 + 10\n" - " // shift Alpha: -7 + 15\n" - " highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value\n" - "\n" - " // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n" - " SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));\n" - "\n" - "#else\n" - " // Old code which is likely wrong.\n" - "\n" - " vec4 c = sample_c();\n" - "\n" - " c.a *= 256.0f / 127.0f; // hm, 0.5 won't give us 1.0 if we just multiply with 2\n" - "\n" - " highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n" - "\n" - " // bit field operation requires GL4 HW.\n" - " SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n" - "#endif\n" - "\n" - "\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main10\n" - "void ps_main10()\n" - "{\n" - " // Convert a GL_FLOAT32 depth texture into a 32 bits UINT texture\n" - " SV_Target1 = uint(exp2(32.0f) * sample_c().r);\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main11\n" - "void ps_main11()\n" - "{\n" - " // Convert a GL_FLOAT32 depth texture into a RGBA color texture\n" - " const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));\n" - " const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);\n" - "\n" - " vec4 res = fract(vec4(sample_c().r) * bitSh);\n" - "\n" - " SV_Target0 = (res - res.xxyz * bitMsk) * 256.0f/255.0f;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main12\n" - "void ps_main12()\n" - "{\n" - " // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture\n" - " const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));\n" - " const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);\n" - " uvec4 color = uvec4(vec4(sample_c().r) * bitSh) & bitMsk;\n" - "\n" - " SV_Target0 = vec4(color) / vec4(32.0f, 32.0f, 32.0f, 1.0f);\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main13\n" - "void ps_main13()\n" - "{\n" - " // Convert a RRGBA texture into a float depth texture\n" - " // FIXME: I'm afraid of the accuracy\n" - " const vec4 bitSh = vec4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp2(-8.0f)) * vec4(255.0);\n" - " gl_FragDepth = dot(sample_c(), bitSh);\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main14\n" - "void ps_main14()\n" - "{\n" - " // Same as above but without the alpha channel (24 bits Z)\n" - "\n" - " // Convert a RRGBA texture into a float depth texture\n" - " // FIXME: I'm afraid of the accuracy\n" - " const vec3 bitSh = vec3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * vec3(255.0);\n" - " gl_FragDepth = dot(sample_c().rgb, bitSh);\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main15\n" - "void ps_main15()\n" - "{\n" - " // Same as above but without the A/B channels (16 bits Z)\n" - "\n" - " // Convert a RRGBA texture into a float depth texture\n" - " // FIXME: I'm afraid of the accuracy\n" - " const vec2 bitSh = vec2(exp2(-32.0f), exp2(-24.0f)) * vec2(255.0);\n" - " gl_FragDepth = dot(sample_c().rg, bitSh);\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main16\n" - "void ps_main16()\n" - "{\n" - " // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z\n" - " // FIXME: I'm afraid of the accuracy\n" - " const vec4 bitSh = vec4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp2(-17.0f));\n" - " // Trunc color to drop useless lsb\n" - " vec4 color = trunc(sample_c() * vec4(255.0f) / vec4(8.0f, 8.0f, 8.0f, 128.0f));\n" - " gl_FragDepth = dot(vec4(color), bitSh);\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main17\n" - "void ps_main17()\n" - "{\n" - "\n" - " // Potential speed optimization. There is a high probability that\n" - " // game only want to extract a single channel (blue). It will allow\n" - " // to remove most of the conditional operation and yield a +2/3 fps\n" - " // boost on MGS3\n" - " //\n" - " // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n" - " //#define ONLY_BLUE;\n" - "\n" - " // Convert a RGBA texture into a 8 bits packed texture\n" - " // Input column: 8x2 RGBA pixels\n" - " // 0: 8 RGBA\n" - " // 1: 8 RGBA\n" - " // Output column: 16x4 Index pixels\n" - " // 0: 8 R | 8 B\n" - " // 1: 8 R | 8 B\n" - " // 2: 8 G | 8 A\n" - " // 3: 8 G | 8 A\n" - " float c;\n" - "\n" - " uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n" - " ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1);\n" - "\n" - " int ty = tb.y | (int(gl_FragCoord.y) & 1);\n" - " int txN = tb.x | (int(gl_FragCoord.x) & 7);\n" - " int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n" - "\n" - " txN *= ScalingFactor.x;\n" - " txH *= ScalingFactor.x;\n" - " ty *= ScalingFactor.y;\n" - "\n" - " // TODO investigate texture gather\n" - " vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n" - " vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n" - "\n" - "\n" - " if ((sel.y & 4u) == 0u) {\n" - " // Column 0 and 2\n" - "#ifdef ONLY_BLUE\n" - " c = cN.b;\n" - "#else\n" - " if ((sel.y & 3u) < 2u) {\n" - " // first 2 lines of the col\n" - " if (sel.x < 8u)\n" - " c = cN.r;\n" - " else\n" - " c = cN.b;\n" - " } else {\n" - " if (sel.x < 8u)\n" - " c = cH.g;\n" - " else\n" - " c = cH.a;\n" - " }\n" - "#endif\n" - " } else {\n" - "#ifdef ONLY_BLUE\n" - " c = cH.b;\n" - "#else\n" - " // Column 1 and 3\n" - " if ((sel.y & 3u) < 2u) {\n" - " // first 2 lines of the col\n" - " if (sel.x < 8u)\n" - " c = cH.r;\n" - " else\n" - " c = cH.b;\n" - " } else {\n" - " if (sel.x < 8u)\n" - " c = cN.g;\n" - " else\n" - " c = cN.a;\n" - " }\n" - "#endif\n" - " }\n" - "\n" - "\n" - " SV_Target0 = vec4(c);\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main7\n" - "void ps_main7()\n" - "{\n" - " vec4 c = sample_c();\n" - "\n" - " c.a = dot(c.rgb, vec3(0.299, 0.587, 0.114));\n" - "\n" - " SV_Target0 = c;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main5\n" - "vec4 ps_scanlines(uint i)\n" - "{\n" - " vec4 mask[2] =\n" - " {\n" - " vec4(1, 1, 1, 0),\n" - " vec4(0, 0, 0, 0)\n" - " };\n" - "\n" - " return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n" - "}\n" - "\n" - "void ps_main5() // scanlines\n" - "{\n" - " highp uvec4 p = uvec4(gl_FragCoord);\n" - "\n" - " vec4 c = ps_scanlines(p.y % 2u);\n" - "\n" - " SV_Target0 = c;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main6\n" - "void ps_main6() // diagonal\n" - "{\n" - " highp uvec4 p = uvec4(gl_FragCoord);\n" - "\n" - " vec4 c = ps_crt((p.x + (p.y % 3u)) % 3u);\n" - "\n" - " SV_Target0 = c;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main8\n" - "void ps_main8() // triangular\n" - "{\n" - " highp uvec4 p = uvec4(gl_FragCoord);\n" - "\n" - " vec4 c = ps_crt(((p.x + ((p.y >> 1u) & 1u) * 3u) >> 1u) % 3u);\n" - "\n" - " SV_Target0 = c;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main9\n" - "void ps_main9()\n" - "{\n" - "\n" - " const float PI = 3.14159265359f;\n" - "\n" - " vec2 texdim = vec2(textureSize(TextureSampler, 0));\n" - "\n" - " vec4 c;\n" - " if (dFdy(PSin_t.y) * PSin_t.y > 0.5f) {\n" - " c = sample_c();\n" - " } else {\n" - " float factor = (0.9f - 0.4f * cos(2.0f * PI * PSin_t.y * texdim.y));\n" - " c = factor * texture(TextureSampler, vec2(PSin_t.x, (floor(PSin_t.y * texdim.y) + 0.5f) / texdim.y));\n" - " }\n" - "\n" - " SV_Target0 = c;\n" - "}\n" - "#endif\n" - "\n" - "// Used for DATE (stencil)\n" - "// DATM == 1\n" - "#ifdef ps_main2\n" - "void ps_main2()\n" - "{\n" - " if(sample_c().a < (127.5f / 255.0f)) // >= 0x80 pass\n" - " discard;\n" - "}\n" - "#endif\n" - "\n" - "// Used for DATE (stencil)\n" - "// DATM == 0\n" - "#ifdef ps_main3\n" - "void ps_main3()\n" - "{\n" - " if((127.5f / 255.0f) < sample_c().a) // < 0x80 pass (== 0x80 should not pass)\n" - " discard;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef ps_main4\n" - "void ps_main4()\n" - "{\n" - " SV_Target0 = mod(round(sample_c() * 255.0f), 256.0f) / 255.0f;\n" - "}\n" - "#endif\n" - "\n" - "#endif\n" - ; - -static const char* interlace_glsl = - "//#version 420 // Keep it for editor detection\n" - "\n" - "struct vertex_basic\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "};\n" - "\n" - "in SHADER\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "} PSin;\n" - "\n" - "#define PSin_p (PSin.p)\n" - "#define PSin_t (PSin.t)\n" - "\n" - "#ifdef FRAGMENT_SHADER\n" - "\n" - "layout(location = 0) out vec4 SV_Target0;\n" - "\n" - "layout(std140, binding = 11) uniform cb11\n" - "{\n" - " vec2 ZrH;\n" - " float hH;\n" - "};\n" - "\n" - "layout(binding = 0) uniform sampler2D TextureSampler;\n" - "\n" - "// TODO ensure that clip (discard) is < 0 and not <= 0 ???\n" - "void ps_main0()\n" - "{\n" - " if (fract(PSin_t.y * hH) - 0.5 < 0.0)\n" - " discard;\n" - " // I'm not sure it impact us but be safe to lookup texture before conditional if\n" - " // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control\n" - " vec4 c = texture(TextureSampler, PSin_t);\n" - "\n" - " SV_Target0 = c;\n" - "}\n" - "\n" - "void ps_main1()\n" - "{\n" - " if (0.5 - fract(PSin_t.y * hH) < 0.0)\n" - " discard;\n" - " // I'm not sure it impact us but be safe to lookup texture before conditional if\n" - " // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control\n" - " vec4 c = texture(TextureSampler, PSin_t);\n" - "\n" - " SV_Target0 = c;\n" - "}\n" - "\n" - "void ps_main2()\n" - "{\n" - " vec4 c0 = texture(TextureSampler, PSin_t - ZrH);\n" - " vec4 c1 = texture(TextureSampler, PSin_t);\n" - " vec4 c2 = texture(TextureSampler, PSin_t + ZrH);\n" - "\n" - " SV_Target0 = (c0 + c1 * 2.0f + c2) / 4.0f;\n" - "}\n" - "\n" - "void ps_main3()\n" - "{\n" - " SV_Target0 = texture(TextureSampler, PSin_t);\n" - "}\n" - "\n" - "#endif\n" - ; - -static const char* merge_glsl = - "//#version 420 // Keep it for editor detection\n" - "\n" - "struct vertex_basic\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "};\n" - "\n" - "in SHADER\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "} PSin;\n" - "\n" - "#define PSin_p (PSin.p)\n" - "#define PSin_t (PSin.t)\n" - "\n" - "#ifdef FRAGMENT_SHADER\n" - "\n" - "layout(location = 0) out vec4 SV_Target0;\n" - "\n" - "layout(std140, binding = 10) uniform cb10\n" - "{\n" - " vec4 BGColor;\n" - "};\n" - "\n" - "layout(binding = 0) uniform sampler2D TextureSampler;\n" - "\n" - "void ps_main0()\n" - "{\n" - " vec4 c = texture(TextureSampler, PSin_t);\n" - " // Note: clamping will be done by fixed unit\n" - " c.a *= 2.0f;\n" - " SV_Target0 = c;\n" - "}\n" - "\n" - "void ps_main1()\n" - "{\n" - " vec4 c = texture(TextureSampler, PSin_t);\n" - " c.a = BGColor.a;\n" - " SV_Target0 = c;\n" - "}\n" - "\n" - "#endif\n" - ; - -static const char* shadeboost_glsl = - "//#version 420 // Keep it for editor detection\n" - "\n" - "/*\n" - "** Contrast, saturation, brightness\n" - "** Code of this function is from TGM's shader pack\n" - "** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057\n" - "** TGM's author comment about the license (included in the previous link)\n" - "** \"do with it, what you want! its total free!\n" - "** (but would be nice, if you say that you used my shaders :wink: ) but not necessary\"\n" - "*/\n" - "\n" - "struct vertex_basic\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "};\n" - "\n" - "#ifdef FRAGMENT_SHADER\n" - "\n" - "in SHADER\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "} PSin;\n" - "\n" - "#define PSin_p (PSin.p)\n" - "#define PSin_t (PSin.t)\n" - "\n" - "layout(location = 0) out vec4 SV_Target0;\n" - "\n" - "layout(std140, binding = 12) uniform cb12\n" - "{\n" - " vec4 BGColor;\n" - "};\n" - "\n" - "layout(binding = 0) uniform sampler2D TextureSampler;\n" - "\n" - "// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% \n" - "vec4 ContrastSaturationBrightness(vec4 color)\n" - "{\n" - " const float sat = SB_SATURATION / 50.0;\n" - " const float brt = SB_BRIGHTNESS / 50.0;\n" - " const float con = SB_CONTRAST / 50.0;\n" - " \n" - " // Increase or decrease these values to adjust r, g and b color channels separately\n" - " const float AvgLumR = 0.5;\n" - " const float AvgLumG = 0.5;\n" - " const float AvgLumB = 0.5;\n" - " \n" - " const vec3 LumCoeff = vec3(0.2125, 0.7154, 0.0721);\n" - " \n" - " vec3 AvgLumin = vec3(AvgLumR, AvgLumG, AvgLumB);\n" - " vec3 brtColor = color.rgb * brt;\n" - " float dot_intensity = dot(brtColor, LumCoeff);\n" - " vec3 intensity = vec3(dot_intensity, dot_intensity, dot_intensity);\n" - " vec3 satColor = mix(intensity, brtColor, sat);\n" - " vec3 conColor = mix(AvgLumin, satColor, con);\n" - "\n" - " color.rgb = conColor; \n" - " return color;\n" - "}\n" - "\n" - "\n" - "void ps_main()\n" - "{\n" - " vec4 c = texture(TextureSampler, PSin_t);\n" - " SV_Target0 = ContrastSaturationBrightness(c);\n" - "}\n" - "\n" - "\n" - "#endif\n" - ; - -static const char* tfx_vgs_glsl = - "//#version 420 // Keep it for text editor detection\n" - "\n" - "layout(std140, binding = 20) uniform cb20\n" - "{\n" - " vec2 VertexScale;\n" - " vec2 VertexOffset;\n" - " vec2 _removed_TextureScale;\n" - " vec2 PointSize;\n" - "};\n" - "\n" - "// Warning duplicated in both GLSL file\n" - "layout(std140, binding = 21) uniform cb21\n" - "{\n" - " vec3 FogColor;\n" - " float AREF;\n" - "\n" - " vec4 WH;\n" - "\n" - " vec2 TA;\n" - " float _pad0;\n" - " float Af;\n" - "\n" - " uvec4 MskFix;\n" - "\n" - " uvec4 FbMask;\n" - "\n" - " vec4 HalfTexel;\n" - "\n" - " vec4 MinMax;\n" - "\n" - " vec2 TextureScale;\n" - " vec2 TC_OffsetHack;\n" - "};\n" - "\n" - "#ifdef VERTEX_SHADER\n" - "layout(location = 0) in vec2 i_st;\n" - "layout(location = 2) in vec4 i_c;\n" - "layout(location = 3) in float i_q;\n" - "layout(location = 4) in uvec2 i_p;\n" - "layout(location = 5) in uint i_z;\n" - "layout(location = 6) in uvec2 i_uv;\n" - "layout(location = 7) in vec4 i_f;\n" - "\n" - "out SHADER\n" - "{\n" - " vec4 t_float;\n" - " vec4 t_int;\n" - " vec4 c;\n" - " flat vec4 fc;\n" - "} VSout;\n" - "\n" - "#define VSout_c (VSout.c)\n" - "#define VSout_fc (VSout.fc)\n" - "\n" - "out gl_PerVertex {\n" - " vec4 gl_Position;\n" - " float gl_PointSize;\n" - "#if !pGL_ES\n" - " float gl_ClipDistance[1];\n" - "#endif\n" - "};\n" - "\n" - "#ifdef ZERO_TO_ONE_DEPTH\n" - "const float exp_min32 = exp2(-32.0f);\n" - "#else\n" - "const float exp_min31 = exp2(-31.0f);\n" - "#endif\n" - "\n" - "void texture_coord()\n" - "{\n" - " vec2 uv = (VS_WILDHACK == 1) ? vec2(i_uv & uvec2(0x3FEF, 0x3FEF)) : vec2(i_uv);\n" - "\n" - " // Float coordinate\n" - " VSout.t_float.xy = i_st;\n" - " VSout.t_float.w = i_q;\n" - "\n" - " // Integer coordinate => normalized\n" - " VSout.t_int.xy = uv * TextureScale;\n" - " // Integer coordinate => integral\n" - " VSout.t_int.zw = uv;\n" - "}\n" - "\n" - "void vs_main()\n" - "{\n" - " highp uint z;\n" - " if(VS_BPPZ == 1) // 24\n" - " z = i_z & uint(0xffffff);\n" - " else if(VS_BPPZ == 2) // 16\n" - " z = i_z & uint(0xffff);\n" - " else\n" - " z = i_z;\n" - "\n" - " // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)\n" - " // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty\n" - " // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel\n" - " // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133\n" - " vec4 p;\n" - "\n" - " p.xy = vec2(i_p) - vec2(0.05f, 0.05f);\n" - " p.xy = p.xy * VertexScale - VertexOffset;\n" - " p.w = 1.0f;\n" - "#ifdef ZERO_TO_ONE_DEPTH\n" - " if(VS_LOGZ == 1) {\n" - " p.z = max(0.0f, log2(float(z))) / 32.0f;\n" - " } else {\n" - " p.z = float(z) * exp_min32;\n" - " }\n" - "#else\n" - " if(VS_LOGZ == 1) {\n" - " p.z = max(0.0f, log2(float(z))) / 31.0f - 1.0f;\n" - " } else {\n" - " p.z = float(z) * exp_min31 - 1.0f;\n" - " }\n" - "#endif\n" - "\n" - " gl_Position = p;\n" - "\n" - " texture_coord();\n" - "\n" - " VSout_c = i_c;\n" - " VSout_fc = i_c;\n" - " VSout.t_float.z = i_f.x; // pack for with texture\n" - "}\n" - "\n" - "#endif\n" - "\n" - "#ifdef GEOMETRY_SHADER\n" - "\n" - "in gl_PerVertex {\n" - " vec4 gl_Position;\n" - " float gl_PointSize;\n" - "#if !pGL_ES\n" - " float gl_ClipDistance[1];\n" - "#endif\n" - "} gl_in[];\n" - "//in int gl_PrimitiveIDIn;\n" - "\n" - "out gl_PerVertex {\n" - " vec4 gl_Position;\n" - " float gl_PointSize;\n" - "#if !pGL_ES\n" - " float gl_ClipDistance[1];\n" - "#endif\n" - "};\n" - "//out int gl_PrimitiveID;\n" - "\n" - "in SHADER\n" - "{\n" - " vec4 t_float;\n" - " vec4 t_int;\n" - " vec4 c;\n" - " flat vec4 fc;\n" - "} GSin[];\n" - "\n" - "out SHADER\n" - "{\n" - " vec4 t_float;\n" - " vec4 t_int;\n" - " vec4 c;\n" - " flat vec4 fc;\n" - "} GSout;\n" - "\n" - "layout(std140, binding = 22) uniform cb22\n" - "{\n" - " vec4 rt_size;\n" - "};\n" - "\n" - "\n" - "struct vertex\n" - "{\n" - " vec4 t_float;\n" - " vec4 t_int;\n" - " vec4 c;\n" - "};\n" - "\n" - "void out_vertex(in vertex v)\n" - "{\n" - " GSout.t_float = v.t_float;\n" - " GSout.t_int = v.t_int;\n" - " GSout.c = v.c;\n" - " // Flat output\n" - "#if GS_POINT == 1\n" - " GSout.fc = GSin[0].fc;\n" - "#else\n" - " GSout.fc = GSin[1].fc;\n" - "#endif\n" - " gl_PrimitiveID = gl_PrimitiveIDIn;\n" - " EmitVertex();\n" - "}\n" - "\n" - "#if GS_POINT == 1\n" - "layout(points) in;\n" - "#else\n" - "layout(lines) in;\n" - "#endif\n" - "layout(triangle_strip, max_vertices = 6) out;\n" - "\n" - "void gs_main()\n" - "{\n" - " // left top => GSin[0];\n" - " // right bottom => GSin[1];\n" - "#if GS_POINT == 1\n" - " vertex rb = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);\n" - "#else\n" - " vertex rb = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c);\n" - "#endif\n" - " vertex lt = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);\n" - "\n" - "#if GS_POINT == 1\n" - " vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f);\n" - "#else\n" - " vec4 rb_p = gl_in[1].gl_Position;\n" - "#endif\n" - " vec4 lb_p = rb_p;\n" - " vec4 rt_p = rb_p;\n" - " vec4 lt_p = gl_in[0].gl_Position;\n" - "\n" - "#if GS_POINT == 0\n" - " // flat depth\n" - " lt_p.z = rb_p.z;\n" - " // flat fog and texture perspective\n" - " lt.t_float.zw = rb.t_float.zw;\n" - " // flat color\n" - " lt.c = rb.c;\n" - "#endif\n" - "\n" - " // Swap texture and position coordinate\n" - " vertex lb = rb;\n" - " lb.t_float.x = lt.t_float.x;\n" - " lb.t_int.x = lt.t_int.x;\n" - " lb.t_int.z = lt.t_int.z;\n" - " lb_p.x = lt_p.x;\n" - "\n" - " vertex rt = rb;\n" - " rt_p.y = lt_p.y;\n" - " rt.t_float.y = lt.t_float.y;\n" - " rt.t_int.y = lt.t_int.y;\n" - " rt.t_int.w = lt.t_int.w;\n" - "\n" - " // Triangle 1\n" - " gl_Position = lt_p;\n" - " out_vertex(lt);\n" - "\n" - " gl_Position = lb_p;\n" - " out_vertex(lb);\n" - "\n" - " gl_Position = rt_p;\n" - " out_vertex(rt);\n" - " EndPrimitive();\n" - "\n" - " // Triangle 2\n" - " gl_Position = lb_p;\n" - " out_vertex(lb);\n" - "\n" - " gl_Position = rt_p;\n" - " out_vertex(rt);\n" - "\n" - " gl_Position = rb_p;\n" - " out_vertex(rb);\n" - " EndPrimitive();\n" - "}\n" - "\n" - "#endif\n" - ; - -static const char* tfx_fs_all_glsl = - "//#version 420 // Keep it for text editor detection\n" - "\n" - "// Require for bit operation\n" - "//#extension GL_ARB_gpu_shader5 : enable\n" - "\n" - "#define FMT_32 0\n" - "#define FMT_24 1\n" - "#define FMT_16 2\n" - "\n" - "#define PS_PAL_FMT (PS_TEX_FMT >> 2)\n" - "#define PS_AEM_FMT (PS_TEX_FMT & 3)\n" - "\n" - "// APITRACE_DEBUG enables forced pixel output to easily detect\n" - "// the fragment computed by primitive\n" - "#define APITRACE_DEBUG 0\n" - "// TEX_COORD_DEBUG output the uv coordinate as color. It is useful\n" - "// to detect bad sampling due to upscaling\n" - "//#define TEX_COORD_DEBUG\n" - "// Just copy directly the texture coordinate\n" - "#ifdef TEX_COORD_DEBUG\n" - "#define PS_TFX 1\n" - "#define PS_TCC 1\n" - "#endif\n" - "\n" - "#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)\n" - "\n" - "#ifdef FRAGMENT_SHADER\n" - "\n" - "in SHADER\n" - "{\n" - " vec4 t_float;\n" - " vec4 t_int;\n" - " vec4 c;\n" - " flat vec4 fc;\n" - "} PSin;\n" - "\n" - "#define PSin_c (PSin.c)\n" - "#define PSin_fc (PSin.fc)\n" - "\n" - "// Same buffer but 2 colors for dual source blending\n" - "layout(location = 0, index = 0) out vec4 SV_Target0;\n" - "layout(location = 0, index = 1) out vec4 SV_Target1;\n" - "\n" - "layout(binding = 0) uniform sampler2D TextureSampler;\n" - "layout(binding = 1) uniform sampler2D PaletteSampler;\n" - "layout(binding = 3) uniform sampler2D RtSampler; // note 2 already use by the image below\n" - "\n" - "#ifndef DISABLE_GL42_image\n" - "#if PS_DATE > 0\n" - "// FIXME how to declare memory access\n" - "layout(r32i, binding = 2) uniform iimage2D img_prim_min;\n" - "// WARNING:\n" - "// You can't enable it if you discard the fragment. The depth is still\n" - "// updated (shadow in Shin Megami Tensei Nocturne)\n" - "//\n" - "// early_fragment_tests must still be enabled in the first pass of the 2 passes algo\n" - "// First pass search the first primitive that will write the bad alpha value. Value\n" - "// won't be written if the fragment fails the depth test.\n" - "//\n" - "// In theory the best solution will be do\n" - "// 1/ copy the depth buffer\n" - "// 2/ do the full depth (current depth writes are disabled)\n" - "// 3/ restore the depth buffer for 2nd pass\n" - "// Of course, it is likely too costly.\n" - "#if PS_DATE == 1 || PS_DATE == 2\n" - "layout(early_fragment_tests) in;\n" - "#endif\n" - "\n" - "// I don't remember why I set this parameter but it is surely useless\n" - "//layout(pixel_center_integer) in vec4 gl_FragCoord;\n" - "#endif\n" - "#else\n" - "// use basic stencil\n" - "#endif\n" - "\n" - "\n" - "// Warning duplicated in both GLSL file\n" - "layout(std140, binding = 21) uniform cb21\n" - "{\n" - " vec3 FogColor;\n" - " float AREF;\n" - "\n" - " vec4 WH;\n" - "\n" - " vec2 TA;\n" - " float _pad0;\n" - " float Af;\n" - "\n" - " uvec4 MskFix;\n" - "\n" - " uvec4 FbMask;\n" - "\n" - " vec4 HalfTexel;\n" - "\n" - " vec4 MinMax;\n" - "\n" - " vec2 TextureScale;\n" - " vec2 TC_OffsetHack;\n" - "};\n" - "\n" - "vec4 sample_c(vec2 uv)\n" - "{\n" - " return texture(TextureSampler, uv);\n" - "}\n" - "\n" - "vec4 sample_p(float idx)\n" - "{\n" - " return texture(PaletteSampler, vec2(idx, 0.0f));\n" - "}\n" - "\n" - "vec4 clamp_wrap_uv(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - "\n" - "#if PS_WMS == PS_WMT\n" - "\n" - "#if PS_WMS == 2\n" - " uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n" - "#elif PS_WMS == 3\n" - " uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n" - "#endif\n" - "\n" - "#else // PS_WMS != PS_WMT\n" - "\n" - "#if PS_WMS == 2\n" - " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" - "\n" - "#elif PS_WMS == 3\n" - " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" - "\n" - "#endif\n" - "\n" - "#if PS_WMT == 2\n" - " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" - "\n" - "#elif PS_WMT == 3\n" - "\n" - " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" - "#endif\n" - "\n" - "#endif\n" - "\n" - " return uv_out;\n" - "}\n" - "\n" - "mat4 sample_4c(vec4 uv)\n" - "{\n" - " mat4 c;\n" - "\n" - " // Note: texture gather can't be used because of special clamping/wrapping\n" - " // Also it doesn't support lod\n" - " c[0] = sample_c(uv.xy);\n" - " c[1] = sample_c(uv.zy);\n" - " c[2] = sample_c(uv.xw);\n" - " c[3] = sample_c(uv.zw);\n" - "\n" - " return c;\n" - "}\n" - "\n" - "vec4 sample_4_index(vec4 uv)\n" - "{\n" - " vec4 c;\n" - "\n" - " // Either GSdx will send a texture that contains a single channel\n" - " // in this case the red channel is remapped as alpha channel\n" - " //\n" - " // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n" - "\n" - " // Note: texture gather can't be used because of special clamping/wrapping\n" - " // Also it doesn't support lod\n" - " c.x = sample_c(uv.xy).a;\n" - " c.y = sample_c(uv.zy).a;\n" - " c.z = sample_c(uv.xw).a;\n" - " c.w = sample_c(uv.zw).a;\n" - "\n" - " uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n" - "\n" - "#if PS_PAL_FMT == 1\n" - " // 4HL\n" - " return vec4(i & 0xFu) / 255.0f;\n" - "\n" - "#elif PS_PAL_FMT == 2\n" - " // 4HH\n" - " return vec4(i >> 4u) / 255.0f;\n" - "\n" - "#else\n" - " // Most of texture will hit this code so keep normalized float value\n" - "\n" - " // 8 bits\n" - " return c;\n" - "#endif\n" - "\n" - "}\n" - "\n" - "mat4 sample_4p(vec4 u)\n" - "{\n" - " mat4 c;\n" - "\n" - " c[0] = sample_p(u.x);\n" - " c[1] = sample_p(u.y);\n" - " c[2] = sample_p(u.z);\n" - " c[3] = sample_p(u.w);\n" - "\n" - " return c;\n" - "}\n" - "\n" - "vec4 sample_color(vec2 st)\n" - "{\n" - "#if (PS_TCOFFSETHACK == 1)\n" - " st += TC_OffsetHack.xy;\n" - "#endif\n" - "\n" - " vec4 t;\n" - " mat4 c;\n" - " vec2 dd;\n" - "\n" - " // FIXME I'm not sure this condition is useful (I think code will be optimized)\n" - "#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)\n" - " // No software LTF and pure 32 bits RGBA texure without special texture wrapping\n" - " c[0] = sample_c(st);\n" - "#ifdef TEX_COORD_DEBUG\n" - " c[0].rg = st.xy;\n" - "#endif\n" - "\n" - "#else\n" - " vec4 uv;\n" - "\n" - " if(PS_LTF != 0)\n" - " {\n" - " uv = st.xyxy + HalfTexel;\n" - " dd = fract(uv.xy * WH.zw);\n" - "#if (PS_FST == 0)\n" - " // Background in Shin Megami Tensei Lucifers\n" - " // I suspect that uv isn't a standard number, so fract is outside of the [0;1] range\n" - " // Note: it is free on GPU but let's do it only for float coordinate\n" - " // Strangely Dx doesn't suffer from this issue.\n" - " dd = clamp(dd, vec2(0.0f), vec2(1.0f));\n" - "#endif\n" - " }\n" - " else\n" - " {\n" - " uv = st.xyxy;\n" - " }\n" - "\n" - " uv = clamp_wrap_uv(uv);\n" - "\n" - "#if PS_PAL_FMT != 0\n" - " c = sample_4p(sample_4_index(uv));\n" - "#else\n" - " c = sample_4c(uv);\n" - "#endif\n" - "\n" - "#ifdef TEX_COORD_DEBUG\n" - " c[0].rg = uv.xy;\n" - " c[1].rg = uv.xy;\n" - " c[2].rg = uv.xy;\n" - " c[3].rg = uv.xy;\n" - "#endif\n" - "\n" - "#endif\n" - "\n" - " // PERF note: using dot product reduces by 1 the number of instruction\n" - " // but I'm not sure it is equivalent neither faster.\n" - " for (int i = 0; i < 4; i++)\n" - " {\n" - " //float sum = dot(c[i].rgb, vec3(1.0f));\n" - "#if (PS_AEM_FMT == FMT_24)\n" - " c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" - " //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" - "#elif (PS_AEM_FMT == FMT_16)\n" - " c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" - " //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" - "#endif\n" - " }\n" - "\n" - "#if(PS_LTF != 0)\n" - " t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);\n" - "#else\n" - " t = c[0];\n" - "#endif\n" - "\n" - " // The 0.05f helps to fix the overbloom of sotc\n" - " // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)\n" - " // interpolation could be slightly below the correct one.\n" - " return trunc(t * 255.0f + 0.05f);\n" - "}\n" - "\n" - "vec4 tfx(vec4 T, vec4 C)\n" - "{\n" - " vec4 C_out;\n" - " vec4 FxT = trunc(trunc(C) * T / 128.0f);\n" - "\n" - "#if (PS_TFX == 0)\n" - " C_out = FxT;\n" - "#elif (PS_TFX == 1)\n" - " C_out = T;\n" - "#elif (PS_TFX == 2)\n" - " C_out.rgb = FxT.rgb + C.a;\n" - " C_out.a = T.a + C.a;\n" - "#elif (PS_TFX == 3)\n" - " C_out.rgb = FxT.rgb + C.a;\n" - " C_out.a = T.a;\n" - "#else\n" - " C_out = C;\n" - "#endif\n" - "\n" - "#if (PS_TCC == 0)\n" - " C_out.a = C.a;\n" - "#endif\n" - "\n" - "#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)\n" - " // Clamp only when it is useful\n" - " C_out = min(C_out, 255.0f);\n" - "#endif\n" - "\n" - " return C_out;\n" - "}\n" - "\n" - "void atst(vec4 C)\n" - "{\n" - " // FIXME use integer cmp\n" - " float a = C.a;\n" - "\n" - "#if (PS_ATST == 0) // never\n" - " discard;\n" - "#elif (PS_ATST == 1) // always\n" - " // nothing to do\n" - "#elif (PS_ATST == 2) // l\n" - " if ((AREF - a - 0.5f) < 0.0f)\n" - " discard;\n" - "#elif (PS_ATST == 3 ) // le\n" - " if ((AREF - a + 0.5f) < 0.0f)\n" - " discard;\n" - "#elif (PS_ATST == 4) // e\n" - " if ((0.5f - abs(a - AREF)) < 0.0f)\n" - " discard;\n" - "#elif (PS_ATST == 5) // ge\n" - " if ((a-AREF + 0.5f) < 0.0f)\n" - " discard;\n" - "#elif (PS_ATST == 6) // g\n" - " if ((a-AREF - 0.5f) < 0.0f)\n" - " discard;\n" - "#elif (PS_ATST == 7) // ne\n" - " if ((abs(a - AREF) - 0.5f) < 0.0f)\n" - " discard;\n" - "#endif\n" - "}\n" - "\n" - "void fog(inout vec4 C, float f)\n" - "{\n" - "#if PS_FOG != 0\n" - " C.rgb = trunc(mix(FogColor, C.rgb, f));\n" - "#endif\n" - "}\n" - "\n" - "vec4 ps_color()\n" - "{\n" - " //FIXME: maybe we can set gl_Position.w = q in VS\n" - "#if (PS_FST == 0)\n" - " vec4 T = sample_color(PSin.t_float.xy / vec2(PSin.t_float.w));\n" - "#else\n" - " // Note xy are normalized coordinate\n" - " vec4 T = sample_color(PSin.t_int.xy);\n" - "#endif\n" - "\n" - "#if PS_IIP == 1\n" - " vec4 C = tfx(T, PSin_c);\n" - "#else\n" - " vec4 C = tfx(T, PSin_fc);\n" - "#endif\n" - "\n" - " atst(C);\n" - "\n" - " fog(C, PSin.t_float.z);\n" - "\n" - "#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n" - " C.rgb = vec3(255.0f);\n" - "#endif\n" - "\n" - " return C;\n" - "}\n" - "\n" - "void ps_fbmask(inout vec4 C)\n" - "{\n" - " // FIXME do I need special case for 16 bits\n" - "#if PS_FBMASK\n" - " vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n" - " C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));\n" - "#endif\n" - "}\n" - "\n" - "void ps_blend(inout vec4 Color, float As)\n" - "{\n" - "#if SW_BLEND\n" - " vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n" - "\n" - "#if PS_DFMT == FMT_24\n" - " float Ad = 1.0f;\n" - "#else\n" - " // FIXME FMT_16 case\n" - " // FIXME Ad or Ad * 2?\n" - " float Ad = RT.a / 128.0f;\n" - "#endif\n" - "\n" - " // Let the compiler do its jobs !\n" - " vec3 Cd = RT.rgb;\n" - " vec3 Cs = Color.rgb;\n" - "\n" - "#if PS_BLEND_A == 0\n" - " vec3 A = Cs;\n" - "#elif PS_BLEND_A == 1\n" - " vec3 A = Cd;\n" - "#else\n" - " vec3 A = vec3(0.0f);\n" - "#endif\n" - "\n" - "#if PS_BLEND_B == 0\n" - " vec3 B = Cs;\n" - "#elif PS_BLEND_B == 1\n" - " vec3 B = Cd;\n" - "#else\n" - " vec3 B = vec3(0.0f);\n" - "#endif\n" - "\n" - "#if PS_BLEND_C == 0\n" - " float C = As;\n" - "#elif PS_BLEND_C == 1\n" - " float C = Ad;\n" - "#else\n" - " float C = Af;\n" - "#endif\n" - "\n" - "#if PS_BLEND_D == 0\n" - " vec3 D = Cs;\n" - "#elif PS_BLEND_D == 1\n" - " vec3 D = Cd;\n" - "#else\n" - " vec3 D = vec3(0.0f);\n" - "#endif\n" - "\n" - "#if PS_BLEND_A == PS_BLEND_B\n" - " Color.rgb = D;\n" - "#else\n" - " Color.rgb = trunc((A - B) * C + D);\n" - "#endif\n" - "\n" - " // FIXME dithering\n" - "\n" - " // Correct the Color value based on the output format\n" - "#if PS_COLCLIP == 0 && PS_HDR == 0\n" - " // Standard Clamp\n" - " Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n" - "#endif\n" - "\n" - " // FIXME rouding of negative float?\n" - " // compiler uses trunc but it might need floor\n" - "\n" - " // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy\n" - " // GS: Color = 1, Alpha = 255 => output 1\n" - " // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875\n" - "#if PS_DFMT == FMT_16\n" - " // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n" - "\n" - " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n" - "#elif PS_COLCLIP == 1 && PS_HDR == 0\n" - " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n" - "#endif\n" - "\n" - "#endif\n" - "}\n" - "\n" - "void ps_main()\n" - "{\n" - "#if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)\n" - "\n" - "#if PS_WRITE_RG == 1\n" - " // Pseudo 16 bits access.\n" - " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g;\n" - "#else\n" - " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n" - "#endif\n" - "\n" - "#if (PS_DATE & 3) == 1\n" - " // DATM == 0: Pixel with alpha equal to 1 will failed\n" - " bool bad = (127.5f / 255.0f) < rt_a;\n" - "#elif (PS_DATE & 3) == 2\n" - " // DATM == 1: Pixel with alpha equal to 0 will failed\n" - " bool bad = rt_a < (127.5f / 255.0f);\n" - "#endif\n" - "\n" - " if (bad) {\n" - "#if PS_DATE >= 5 || defined(DISABLE_GL42_image)\n" - " discard;\n" - "#else\n" - " imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));\n" - " return;\n" - "#endif\n" - " }\n" - "\n" - "#endif\n" - "\n" - "#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n" - " int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;\n" - " // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n" - " // the bad alpha value so we must keep it.\n" - "\n" - " if (gl_PrimitiveID > stencil_ceil) {\n" - " discard;\n" - " }\n" - "#endif\n" - "\n" - " vec4 C = ps_color();\n" - "#if (APITRACE_DEBUG & 1) == 1\n" - " C.r = 255f;\n" - "#endif\n" - "#if (APITRACE_DEBUG & 2) == 2\n" - " C.g = 255f;\n" - "#endif\n" - "#if (APITRACE_DEBUG & 4) == 4\n" - " C.b = 255f;\n" - "#endif\n" - "#if (APITRACE_DEBUG & 8) == 8\n" - " C.a = 128f;\n" - "#endif\n" - "\n" - "#if PS_SHUFFLE\n" - " uvec4 denorm_c = uvec4(C);\n" - " uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n" - "\n" - " // Write RB part. Mask will take care of the correct destination\n" - "#if PS_READ_BA\n" - " C.rb = C.bb;\n" - "#else\n" - " C.rb = C.rr;\n" - "#endif\n" - "\n" - " // FIXME precompute my_TA & 0x80\n" - "\n" - " // Write GA part. Mask will take care of the correct destination\n" - " // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n" - " // However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n" - "#if PS_READ_BA\n" - " // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n" - " // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n" - " // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n" - " // c.ga = vec2(float(denorm_c.a));\n" - "\n" - " if (bool(denorm_c.a & 0x80u))\n" - " C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));\n" - " else\n" - " C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));\n" - "\n" - "#else\n" - " if (bool(denorm_c.g & 0x80u))\n" - " C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));\n" - " else\n" - " C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));\n" - "\n" - " // Nice idea but step/mix requires 4 instructions\n" - " // set / trunc / I2F / Mad\n" - " //\n" - " // float sel = step(128.0f, c.g);\n" - " // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));\n" - " // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);\n" - "#endif\n" - "\n" - "#endif\n" - "\n" - " // Must be done before alpha correction\n" - " float alpha_blend = C.a / 128.0f;\n" - "\n" - " // Correct the ALPHA value based on the output format\n" - "#if (PS_DFMT == FMT_16)\n" - " float A_one = 128.0f; // alpha output will be 0x80\n" - " C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;\n" - "#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n" - " if(C.a < 128.0f) C.a += 128.0f;\n" - "#endif\n" - "\n" - " // Get first primitive that will write a failling alpha value\n" - "#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n" - " // DATM == 0\n" - " // Pixel with alpha equal to 1 will failed (128-255)\n" - " if (C.a > 127.5f) {\n" - " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" - " }\n" - " return;\n" - "#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n" - " // DATM == 1\n" - " // Pixel with alpha equal to 0 will failed (0-127)\n" - " if (C.a < 127.5f) {\n" - " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" - " }\n" - " return;\n" - "#endif\n" - "\n" - " ps_blend(C, alpha_blend);\n" - "\n" - " ps_fbmask(C);\n" - "\n" - "#if PS_HDR == 1\n" - " // Use negative value to avoid overflow of the texture (in accumulation mode)\n" - " // Note: code were initially done for an Half-Float texture. Due to overflow\n" - " // the texture was upgraded to a full float. Maybe this code is useless now!\n" - " // Good testcase is castlevania\n" - " if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n" - " C.rgb = (C.rgb - 256.0f);\n" - " }\n" - "#endif\n" - " SV_Target0 = C / 255.0f;\n" - " SV_Target1 = vec4(alpha_blend);\n" - "}\n" - "\n" - "#endif\n" - ; - -static const char* fxaa_fx = - "#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130)\n" - "\n" - "#ifndef FXAA_GLSL_130\n" - " #define FXAA_GLSL_130 0\n" - "#endif\n" - "\n" - "#define UHQ_FXAA 1 //High Quality Fast Approximate Anti Aliasing. Adapted for GSdx from Timothy Lottes FXAA 3.11.\n" - "#define FxaaSubpixMax 0.0 //[0.00 to 1.00] Amount of subpixel aliasing removal. 0.00: Edge only antialiasing (no blurring)\n" - "#define FxaaEarlyExit 1 //[0 or 1] Use Fxaa early exit pathing. When disabled, the entire scene is antialiased(FSAA). 0 is off, 1 is on.\n" - "\n" - "/*------------------------------------------------------------------------------\n" - " [GLOBALS|FUNCTIONS]\n" - "------------------------------------------------------------------------------*/\n" - "#if (FXAA_GLSL_130 == 1)\n" - "\n" - "struct vertex_basic\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "};\n" - "\n" - "layout(binding = 0) uniform sampler2D TextureSampler;\n" - "\n" - "in SHADER\n" - "{\n" - " vec4 p;\n" - " vec2 t;\n" - "} PSin;\n" - "\n" - "layout(location = 0) out vec4 SV_Target0;\n" - "\n" - "#else\n" - "\n" - "#if (SHADER_MODEL >= 0x400)\n" - "Texture2D Texture : register(t0);\n" - "SamplerState TextureSampler : register(s0);\n" - "#else\n" - "texture2D Texture : register(t0);\n" - "sampler2D TextureSampler : register(s0);\n" - "#define SamplerState sampler2D\n" - "#endif\n" - "\n" - "cbuffer cb0\n" - "{\n" - " float4 _rcpFrame : register(c0);\n" - "};\n" - "\n" - "struct VS_INPUT\n" - "{\n" - " float4 p : POSITION;\n" - " float2 t : TEXCOORD0;\n" - "};\n" - "\n" - "struct VS_OUTPUT\n" - "{\n" - " #if (SHADER_MODEL >= 0x400)\n" - " float4 p : SV_Position;\n" - " #else\n" - " float4 p : TEXCOORD1;\n" - " #endif\n" - " float2 t : TEXCOORD0;\n" - "};\n" - "\n" - "struct PS_OUTPUT\n" - "{\n" - " #if (SHADER_MODEL >= 0x400)\n" - " float4 c : SV_Target0;\n" - " #else\n" - " float4 c : COLOR0;\n" - " #endif\n" - "};\n" - "\n" - "#endif\n" - "\n" - "/*------------------------------------------------------------------------------\n" - " [FXAA CODE SECTION]\n" - "------------------------------------------------------------------------------*/\n" - "\n" - "#if (SHADER_MODEL >= 0x500)\n" - "#define FXAA_HLSL_5 1\n" - "#define FXAA_GATHER4_ALPHA 1\n" - "#elif (SHADER_MODEL >= 0x400)\n" - "#define FXAA_HLSL_4 1\n" - "#define FXAA_GATHER4_ALPHA 0\n" - "#elif (FXAA_GLSL_130 == 1)\n" - "#define FXAA_GATHER4_ALPHA 1\n" - "#else\n" - "#define FXAA_HLSL_3 1\n" - "#define FXAA_GATHER4_ALPHA 0\n" - "#endif\n" - "\n" - "#if (FXAA_HLSL_5 == 1)\n" - "struct FxaaTex { SamplerState smpl; Texture2D tex; };\n" - "#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)\n" - "#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)\n" - "#define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p)\n" - "#define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o)\n" - "#define FxaaDiscard clip(-1)\n" - "#define FxaaSat(x) saturate(x)\n" - "\n" - "#elif (FXAA_HLSL_4 == 1)\n" - "struct FxaaTex { SamplerState smpl; Texture2D tex; };\n" - "#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)\n" - "#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)\n" - "#define FxaaDiscard clip(-1)\n" - "#define FxaaSat(x) saturate(x)\n" - "\n" - "#elif (FXAA_HLSL_3 == 1)\n" - "#define FxaaTex sampler2D\n" - "#define int2 float2\n" - "#define FxaaSat(x) saturate(x)\n" - "#define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))\n" - "#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))\n" - "\n" - "#elif (FXAA_GLSL_130 == 1)\n" - "\n" - "#define int2 ivec2\n" - "#define float2 vec2\n" - "#define float3 vec3\n" - "#define float4 vec4\n" - "#define FxaaDiscard discard\n" - "#define FxaaSat(x) clamp(x, 0.0, 1.0)\n" - "#define FxaaTex sampler2D\n" - "#define FxaaTexTop(t, p) textureLod(t, p, 0.0)\n" - "#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)\n" - "#if (FXAA_GATHER4_ALPHA == 1)\n" - "// use #extension GL_ARB_gpu_shader5 : enable\n" - "#define FxaaTexAlpha4(t, p) textureGather(t, p, 3)\n" - "#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)\n" - "#endif\n" - "\n" - "#endif\n" - "\n" - "#define FxaaEdgeThreshold 0.063\n" - "#define FxaaEdgeThresholdMin 0.00\n" - "#define FXAA_QUALITY__P0 1.0\n" - "#define FXAA_QUALITY__P1 1.5\n" - "#define FXAA_QUALITY__P2 2.0\n" - "#define FXAA_QUALITY__P3 2.0\n" - "#define FXAA_QUALITY__P4 2.0\n" - "#define FXAA_QUALITY__P5 2.0\n" - "#define FXAA_QUALITY__P6 2.0\n" - "#define FXAA_QUALITY__P7 2.0\n" - "#define FXAA_QUALITY__P8 2.0\n" - "#define FXAA_QUALITY__P9 2.0\n" - "#define FXAA_QUALITY__P10 4.0\n" - "#define FXAA_QUALITY__P11 8.0\n" - "#define FXAA_QUALITY__P12 8.0\n" - "\n" - "/*------------------------------------------------------------------------------\n" - " [GAMMA PREPASS CODE SECTION]\n" - "------------------------------------------------------------------------------*/\n" - "float RGBLuminance(float3 color)\n" - "{\n" - " const float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750);\n" - " return dot(color.rgb, lumCoeff);\n" - "}\n" - "\n" - "#if (FXAA_GLSL_130 == 0)\n" - "#define PixelSize float2(_rcpFrame.x, _rcpFrame.y)\n" - "#endif\n" - "\n" - "\n" - "float3 RGBGammaToLinear(float3 color, float gamma)\n" - "{\n" - " color = FxaaSat(color);\n" - " color.r = (color.r <= 0.0404482362771082) ?\n" - " color.r / 12.92 : pow((color.r + 0.055) / 1.055, gamma);\n" - " color.g = (color.g <= 0.0404482362771082) ?\n" - " color.g / 12.92 : pow((color.g + 0.055) / 1.055, gamma);\n" - " color.b = (color.b <= 0.0404482362771082) ?\n" - " color.b / 12.92 : pow((color.b + 0.055) / 1.055, gamma);\n" - "\n" - " return color;\n" - "}\n" - "\n" - "float3 LinearToRGBGamma(float3 color, float gamma)\n" - "{\n" - " color = FxaaSat(color);\n" - " color.r = (color.r <= 0.00313066844250063) ?\n" - " color.r * 12.92 : 1.055 * pow(color.r, 1.0 / gamma) - 0.055;\n" - " color.g = (color.g <= 0.00313066844250063) ?\n" - " color.g * 12.92 : 1.055 * pow(color.g, 1.0 / gamma) - 0.055;\n" - " color.b = (color.b <= 0.00313066844250063) ?\n" - " color.b * 12.92 : 1.055 * pow(color.b, 1.0 / gamma) - 0.055;\n" - "\n" - " return color;\n" - "}\n" - "\n" - "float4 PreGammaPass(float4 color, float2 uv0)\n" - "{\n" - " #if (SHADER_MODEL >= 0x400)\n" - " color = Texture.Sample(TextureSampler, uv0);\n" - " #elif (FXAA_GLSL_130 == 1)\n" - " color = texture(TextureSampler, uv0);\n" - " #else\n" - " color = tex2D(TextureSampler, uv0);\n" - " #endif\n" - "\n" - " const float GammaConst = 2.233;\n" - " color.rgb = RGBGammaToLinear(color.rgb, GammaConst);\n" - " color.rgb = LinearToRGBGamma(color.rgb, GammaConst);\n" - " color.a = RGBLuminance(color.rgb);\n" - "\n" - " return color;\n" - "}\n" - "\n" - "\n" - "/*------------------------------------------------------------------------------\n" - " [FXAA CODE SECTION]\n" - "------------------------------------------------------------------------------*/\n" - "\n" - "float FxaaLuma(float4 rgba)\n" - "{ \n" - " rgba.w = RGBLuminance(rgba.xyz);\n" - " return rgba.w; \n" - "}\n" - "\n" - "float4 FxaaPixelShader(float2 pos, FxaaTex tex, float2 fxaaRcpFrame, float fxaaSubpix, float fxaaEdgeThreshold, float fxaaEdgeThresholdMin)\n" - "{\n" - " float2 posM;\n" - " posM.x = pos.x;\n" - " posM.y = pos.y;\n" - "\n" - " #if (FXAA_GATHER4_ALPHA == 1)\n" - " float4 rgbyM = FxaaTexTop(tex, posM);\n" - " float4 luma4A = FxaaTexAlpha4(tex, posM);\n" - " float4 luma4B = FxaaTexOffAlpha4(tex, posM, int2(-1, -1));\n" - " rgbyM.w = RGBLuminance(rgbyM.xyz);\n" - "\n" - " #define lumaM rgbyM.w\n" - " #define lumaE luma4A.z\n" - " #define lumaS luma4A.x\n" - " #define lumaSE luma4A.y\n" - " #define lumaNW luma4B.w\n" - " #define lumaN luma4B.z\n" - " #define lumaW luma4B.x\n" - " \n" - " #else\n" - " float4 rgbyM = FxaaTexTop(tex, posM);\n" - " rgbyM.w = RGBLuminance(rgbyM.xyz);\n" - " #define lumaM rgbyM.w\n" - "\n" - " float lumaS = FxaaLuma(FxaaTexOff(tex, posM, int2( 0, 1), fxaaRcpFrame.xy));\n" - " float lumaE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 0), fxaaRcpFrame.xy));\n" - " float lumaN = FxaaLuma(FxaaTexOff(tex, posM, int2( 0,-1), fxaaRcpFrame.xy));\n" - " float lumaW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 0), fxaaRcpFrame.xy));\n" - " #endif\n" - "\n" - " float maxSM = max(lumaS, lumaM);\n" - " float minSM = min(lumaS, lumaM);\n" - " float maxESM = max(lumaE, maxSM);\n" - " float minESM = min(lumaE, minSM);\n" - " float maxWN = max(lumaN, lumaW);\n" - " float minWN = min(lumaN, lumaW);\n" - "\n" - " float rangeMax = max(maxWN, maxESM);\n" - " float rangeMin = min(minWN, minESM);\n" - " float range = rangeMax - rangeMin;\n" - " float rangeMaxScaled = rangeMax * fxaaEdgeThreshold;\n" - " float rangeMaxClamped = max(fxaaEdgeThresholdMin, rangeMaxScaled);\n" - "\n" - " bool earlyExit = range < rangeMaxClamped;\n" - " #if (FxaaEarlyExit == 1)\n" - " if(earlyExit) { return rgbyM; }\n" - " #endif\n" - "\n" - " #if (FXAA_GATHER4_ALPHA == 0)\n" - " float lumaNW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1,-1), fxaaRcpFrame.xy));\n" - " float lumaSE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 1), fxaaRcpFrame.xy));\n" - " float lumaNE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1,-1), fxaaRcpFrame.xy));\n" - " float lumaSW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 1), fxaaRcpFrame.xy));\n" - " #else\n" - " float lumaNE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1,-1), fxaaRcpFrame.xy));\n" - " float lumaSW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 1), fxaaRcpFrame.xy));\n" - " #endif\n" - "\n" - " float lumaNS = lumaN + lumaS;\n" - " float lumaWE = lumaW + lumaE;\n" - " float subpixRcpRange = 1.0/range;\n" - " float subpixNSWE = lumaNS + lumaWE;\n" - " float edgeHorz1 = (-2.0 * lumaM) + lumaNS;\n" - " float edgeVert1 = (-2.0 * lumaM) + lumaWE;\n" - " float lumaNESE = lumaNE + lumaSE;\n" - " float lumaNWNE = lumaNW + lumaNE;\n" - " float edgeHorz2 = (-2.0 * lumaE) + lumaNESE;\n" - " float edgeVert2 = (-2.0 * lumaN) + lumaNWNE;\n" - "\n" - " float lumaNWSW = lumaNW + lumaSW;\n" - " float lumaSWSE = lumaSW + lumaSE;\n" - " float edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);\n" - " float edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);\n" - " float edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;\n" - " float edgeVert3 = (-2.0 * lumaS) + lumaSWSE;\n" - " float edgeHorz = abs(edgeHorz3) + edgeHorz4;\n" - " float edgeVert = abs(edgeVert3) + edgeVert4;\n" - "\n" - " float subpixNWSWNESE = lumaNWSW + lumaNESE;\n" - " float lengthSign = fxaaRcpFrame.x;\n" - " bool horzSpan = edgeHorz >= edgeVert;\n" - " float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;\n" - " if(!horzSpan) lumaN = lumaW;\n" - " if(!horzSpan) lumaS = lumaE;\n" - " if(horzSpan) lengthSign = fxaaRcpFrame.y;\n" - " float subpixB = (subpixA * (1.0/12.0)) - lumaM;\n" - "\n" - " float gradientN = lumaN - lumaM;\n" - " float gradientS = lumaS - lumaM;\n" - " float lumaNN = lumaN + lumaM;\n" - " float lumaSS = lumaS + lumaM;\n" - " bool pairN = abs(gradientN) >= abs(gradientS);\n" - " float gradient = max(abs(gradientN), abs(gradientS));\n" - " if(pairN) lengthSign = -lengthSign;\n" - " float subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);\n" - "\n" - " float2 posB;\n" - " posB.x = posM.x;\n" - " posB.y = posM.y;\n" - " float2 offNP;\n" - " offNP.x = (!horzSpan) ? 0.0 : fxaaRcpFrame.x;\n" - " offNP.y = ( horzSpan) ? 0.0 : fxaaRcpFrame.y;\n" - " if(!horzSpan) posB.x += lengthSign * 0.5;\n" - " if( horzSpan) posB.y += lengthSign * 0.5;\n" - "\n" - " float2 posN;\n" - " posN.x = posB.x - offNP.x * FXAA_QUALITY__P0;\n" - " posN.y = posB.y - offNP.y * FXAA_QUALITY__P0;\n" - " float2 posP;\n" - " posP.x = posB.x + offNP.x * FXAA_QUALITY__P0;\n" - " posP.y = posB.y + offNP.y * FXAA_QUALITY__P0;\n" - " float subpixD = ((-2.0)*subpixC) + 3.0;\n" - " float lumaEndN = FxaaLuma(FxaaTexTop(tex, posN));\n" - " float subpixE = subpixC * subpixC;\n" - " float lumaEndP = FxaaLuma(FxaaTexTop(tex, posP));\n" - "\n" - " if(!pairN) lumaNN = lumaSS;\n" - " float gradientScaled = gradient * 1.0/4.0;\n" - " float lumaMM = lumaM - lumaNN * 0.5;\n" - " float subpixF = subpixD * subpixE;\n" - " bool lumaMLTZero = lumaMM < 0.0;\n" - " lumaEndN -= lumaNN * 0.5;\n" - " lumaEndP -= lumaNN * 0.5;\n" - " bool doneN = abs(lumaEndN) >= gradientScaled;\n" - " bool doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1;\n" - " bool doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11;\n" - "\n" - " if(doneNP) {\n" - " if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n" - " if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n" - " if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n" - " if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n" - " doneN = abs(lumaEndN) >= gradientScaled;\n" - " doneP = abs(lumaEndP) >= gradientScaled;\n" - " if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12;\n" - " if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12;\n" - " doneNP = (!doneN) || (!doneP);\n" - " if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12;\n" - " if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12;\n" - " }}}}}}}}}}}\n" - "\n" - " float dstN = posM.x - posN.x;\n" - " float dstP = posP.x - posM.x;\n" - " if(!horzSpan) dstN = posM.y - posN.y;\n" - " if(!horzSpan) dstP = posP.y - posM.y;\n" - "\n" - " bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;\n" - " float spanLength = (dstP + dstN);\n" - " bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;\n" - " float spanLengthRcp = 1.0/spanLength;\n" - "\n" - " bool directionN = dstN < dstP;\n" - " float dst = min(dstN, dstP);\n" - " bool goodSpan = directionN ? goodSpanN : goodSpanP;\n" - " float subpixG = subpixF * subpixF;\n" - " float pixelOffset = (dst * (-spanLengthRcp)) + 0.5;\n" - " float subpixH = subpixG * fxaaSubpix;\n" - "\n" - " float pixelOffsetGood = goodSpan ? pixelOffset : 0.0;\n" - " float pixelOffsetSubpix = max(pixelOffsetGood, subpixH);\n" - " if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;\n" - " if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;\n" - "\n" - " return float4(FxaaTexTop(tex, posM).xyz, lumaM);\n" - "}\n" - "\n" - "#if (FXAA_GLSL_130 == 1)\n" - "float4 FxaaPass(float4 FxaaColor, float2 uv0)\n" - "#else\n" - "float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0)\n" - "#endif\n" - "{\n" - "\n" - " #if (SHADER_MODEL >= 0x400)\n" - " FxaaTex tex;\n" - " tex.tex = Texture;\n" - " tex.smpl = TextureSampler;\n" - "\n" - " Texture.GetDimensions(PixelSize.x, PixelSize.y);\n" - " FxaaColor = FxaaPixelShader(uv0, tex, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n" - "\n" - " #elif (FXAA_GLSL_130 == 1)\n" - "\n" - " vec2 PixelSize = textureSize(TextureSampler, 0);\n" - " FxaaColor = FxaaPixelShader(uv0, TextureSampler, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n" - "\n" - " #else\n" - " FxaaTex tex;\n" - " tex = TextureSampler;\n" - " FxaaColor = FxaaPixelShader(uv0, tex, PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n" - " #endif\n" - "\n" - " return FxaaColor;\n" - "}\n" - "\n" - "/*------------------------------------------------------------------------------\n" - " [MAIN() & COMBINE PASS CODE SECTION]\n" - "------------------------------------------------------------------------------*/\n" - "#if (FXAA_GLSL_130 == 1)\n" - "\n" - "void ps_main()\n" - "{\n" - " vec4 color = texture(TextureSampler, PSin.t);\n" - " color = PreGammaPass(color, PSin.t);\n" - " color = FxaaPass(color, PSin.t);\n" - "\n" - " SV_Target0 = color;\n" - "}\n" - "\n" - "#else\n" - "\n" - "PS_OUTPUT ps_main(VS_OUTPUT input)\n" - "{\n" - " PS_OUTPUT output;\n" - "\n" - " #if (SHADER_MODEL >= 0x400)\n" - " float4 color = Texture.Sample(TextureSampler, input.t);\n" - "\n" - " color = PreGammaPass(color, input.t);\n" - " color = FxaaPass(color, input.t);\n" - " #else\n" - " float4 color = tex2D(TextureSampler, input.t);\n" - "\n" - " color = PreGammaPass(color, input.t);\n" - " color = FxaaPass(color, input.t);\n" - " #endif\n" - "\n" - " output.c = color;\n" - " \n" - " return output;\n" - "}\n" - "\n" - "#endif\n" - "\n" - "#endif\n" - ; diff --git a/plugins/GSdx_legacy/res/interlace.fx b/plugins/GSdx_legacy/res/interlace.fx deleted file mode 100644 index 9e6c76b02e..0000000000 --- a/plugins/GSdx_legacy/res/interlace.fx +++ /dev/null @@ -1,87 +0,0 @@ -#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency - -#if SHADER_MODEL >= 0x400 - -Texture2D Texture; -SamplerState Sampler; - -cbuffer cb0 -{ - float2 ZrH; - float hH; -}; - -struct PS_INPUT -{ - float4 p : SV_Position; - float2 t : TEXCOORD0; -}; - -float4 ps_main0(PS_INPUT input) : SV_Target0 -{ - clip(frac(input.t.y * hH) - 0.5); - - return Texture.Sample(Sampler, input.t); -} - -float4 ps_main1(PS_INPUT input) : SV_Target0 -{ - clip(0.5 - frac(input.t.y * hH)); - - return Texture.Sample(Sampler, input.t); -} - -float4 ps_main2(PS_INPUT input) : SV_Target0 -{ - float4 c0 = Texture.Sample(Sampler, input.t - ZrH); - float4 c1 = Texture.Sample(Sampler, input.t); - float4 c2 = Texture.Sample(Sampler, input.t + ZrH); - - return (c0 + c1 * 2 + c2) / 4; -} - -float4 ps_main3(PS_INPUT input) : SV_Target0 -{ - return Texture.Sample(Sampler, input.t); -} - -#elif SHADER_MODEL <= 0x300 - -sampler s0 : register(s0); - -float4 Params1 : register(c0); - -#define ZrH (Params1.xy) -#define hH (Params1.z) - -float4 ps_main0(float2 tex : TEXCOORD0) : COLOR -{ - clip(frac(tex.y * hH) - 0.5); - - return tex2D(s0, tex); -} - -float4 ps_main1(float2 tex : TEXCOORD0) : COLOR -{ - clip(0.5 - frac(tex.y * hH)); - - return tex2D(s0, tex); -} - -float4 ps_main2(float2 tex : TEXCOORD0) : COLOR -{ - float4 c0 = tex2D(s0, tex - ZrH); - float4 c1 = tex2D(s0, tex); - float4 c2 = tex2D(s0, tex + ZrH); - - return (c0 + c1 * 2 + c2) / 4; -} - -float4 ps_main3(float2 tex : TEXCOORD0) : COLOR -{ - return tex2D(s0, tex); -} - -#endif - -#endif diff --git a/plugins/GSdx_legacy/res/logo-ogl.bmp b/plugins/GSdx_legacy/res/logo-ogl.bmp deleted file mode 100644 index 03b077f322..0000000000 Binary files a/plugins/GSdx_legacy/res/logo-ogl.bmp and /dev/null differ diff --git a/plugins/GSdx_legacy/res/logo10.bmp b/plugins/GSdx_legacy/res/logo10.bmp deleted file mode 100644 index 25f6b177c7..0000000000 Binary files a/plugins/GSdx_legacy/res/logo10.bmp and /dev/null differ diff --git a/plugins/GSdx_legacy/res/logo9.bmp b/plugins/GSdx_legacy/res/logo9.bmp deleted file mode 100644 index 24cfdbeea8..0000000000 Binary files a/plugins/GSdx_legacy/res/logo9.bmp and /dev/null differ diff --git a/plugins/GSdx_legacy/res/merge.fx b/plugins/GSdx_legacy/res/merge.fx deleted file mode 100644 index 4d8236b11c..0000000000 --- a/plugins/GSdx_legacy/res/merge.fx +++ /dev/null @@ -1,60 +0,0 @@ -#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency -#if SHADER_MODEL >= 0x400 - -Texture2D Texture; -SamplerState Sampler; - -cbuffer cb0 -{ - float4 BGColor; -}; - -struct PS_INPUT -{ - float4 p : SV_Position; - float2 t : TEXCOORD0; -}; - -float4 ps_main0(PS_INPUT input) : SV_Target0 -{ - float4 c = Texture.Sample(Sampler, input.t); - c.a = min(c.a * 2, 1); - return c; -} - -float4 ps_main1(PS_INPUT input) : SV_Target0 -{ - float4 c = Texture.Sample(Sampler, input.t); - c.a = BGColor.a; - return c; -} - -#elif SHADER_MODEL <= 0x300 - -sampler Texture : register(s0); - -float4 g_params[1]; - -#define BGColor (g_params[0]) - -struct PS_INPUT -{ - float2 t : TEXCOORD0; -}; - -float4 ps_main0(PS_INPUT input) : COLOR -{ - float4 c = tex2D(Texture, input.t); - // a = ; - return c.bgra; -} - -float4 ps_main1(PS_INPUT input) : COLOR -{ - float4 c = tex2D(Texture, input.t); - c.a = BGColor.a; - return c.bgra; -} - -#endif -#endif diff --git a/plugins/GSdx_legacy/res/shadeboost.fx b/plugins/GSdx_legacy/res/shadeboost.fx deleted file mode 100644 index 8a30067440..0000000000 --- a/plugins/GSdx_legacy/res/shadeboost.fx +++ /dev/null @@ -1,76 +0,0 @@ -#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency - -/* -** Contrast, saturation, brightness -** Code of this function is from TGM's shader pack -** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057 -*/ - -// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% -float4 ContrastSaturationBrightness(float4 color) // Ported to HLSL -{ - const float sat = SB_SATURATION / 50.0; - const float brt = SB_BRIGHTNESS / 50.0; - const float con = SB_CONTRAST / 50.0; - - // Increase or decrease these values to adjust r, g and b color channels separately - const float AvgLumR = 0.5; - const float AvgLumG = 0.5; - const float AvgLumB = 0.5; - - const float3 LumCoeff = float3(0.2125, 0.7154, 0.0721); - - float3 AvgLumin = float3(AvgLumR, AvgLumG, AvgLumB); - float3 brtColor = color.rgb * brt; - float3 intensity = dot(brtColor, LumCoeff); - float3 satColor = lerp(intensity, brtColor, sat); - float3 conColor = lerp(AvgLumin, satColor, con); - - color.rgb = conColor; - return color; -} - -#if SHADER_MODEL >= 0x400 - -Texture2D Texture; -SamplerState Sampler; - -cbuffer cb0 -{ - float4 BGColor; -}; - -struct PS_INPUT -{ - float4 p : SV_Position; - float2 t : TEXCOORD0; -}; - -float4 ps_main(PS_INPUT input) : SV_Target0 -{ - float4 c = Texture.Sample(Sampler, input.t); - return ContrastSaturationBrightness(c); -} - - -#elif SHADER_MODEL <= 0x300 - -sampler Texture : register(s0); - -float4 g_params[1]; - -#define BGColor (g_params[0]) - -struct PS_INPUT -{ - float2 t : TEXCOORD0; -}; - -float4 ps_main(PS_INPUT input) : COLOR -{ - float4 c = tex2D(Texture, input.t); - return ContrastSaturationBrightness(c); -} - -#endif -#endif diff --git a/plugins/GSdx_legacy/res/tfx.cl b/plugins/GSdx_legacy/res/tfx.cl deleted file mode 100644 index 91bebeed11..0000000000 --- a/plugins/GSdx_legacy/res/tfx.cl +++ /dev/null @@ -1,1629 +0,0 @@ -#if defined(CL_VERSION_2_0) - -#error hello - -#endif - -#if defined(CL_VERSION_1_1) || defined(CL_VERSION_1_2) // make safe to include in resource file to enforce dependency - -#ifdef cl_amd_printf -#pragma OPENCL EXTENSION cl_amd_printf : enable -#endif - -#ifdef cl_amd_media_ops -#pragma OPENCL EXTENSION cl_amd_media_ops : enable -#else -#endif - -#ifdef cl_amd_media_ops2 -#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable -#else -#endif - -#ifndef CL_FLT_EPSILON -#define CL_FLT_EPSILON 1.1920928955078125e-7f -#endif - -#if MAX_PRIM_PER_BATCH == 64u - #define BIN_TYPE ulong -#elif MAX_PRIM_PER_BATCH == 32u - #define BIN_TYPE uint -#else - #error "MAX_PRIM_PER_BATCH != 32u OR 64u" -#endif - -#define TFX_ABA(sel) ((sel.x >> 24) & 3) -#define TFX_ABB(sel) ((sel.x >> 26) & 3) -#define TFX_ABC(sel) ((sel.x >> 28) & 3) -#define TFX_ABD(sel) ((sel.x >> 30) & 3) -#define TFX_WMS(sel) ((sel.y >> 8) & 3) -#define TFX_WMT(sel) ((sel.y >> 10) & 3) - -typedef struct -{ - union {float4 p; struct {float x, y; uint z, f;};}; - union {float4 tc; struct {float s, t, q; uchar4 c;};}; -} gs_vertex; - -typedef struct -{ - gs_vertex v[3]; - uint zmin, zmax; - uint pb_index; - uint _pad; -} gs_prim; - -typedef struct -{ - float4 dx, dy; - float4 zero; - float4 reject_corner; -} gs_barycentric; - -typedef struct -{ - struct {uint first, last;} bounds[MAX_BIN_PER_BATCH]; - BIN_TYPE bin[MAX_BIN_COUNT]; - uchar4 bbox[MAX_PRIM_COUNT]; - gs_prim prim[MAX_PRIM_COUNT]; - gs_barycentric barycentric[MAX_PRIM_COUNT]; -} gs_env; - -typedef struct -{ - int4 scissor; - char dimx[4][4]; - uint2 sel; - int fbp, zbp, bw; - uint fm, zm; - uchar4 fog; // rgb - uchar aref, afix; - uchar ta0, ta1; - int tbp[7], tbw[7]; - int minu, maxu, minv, maxv; - int lod; // lcm == 1 - int mxl; - float l; // TEX1.L * -0x10000 - float k; // TEX1.K * 0x10000 - uchar4 clut[256]; // TODO: this could be an index to a separate buffer, it may be the same across several gs_params following eachother -} gs_param; - -enum GS_PRIM_CLASS -{ - GS_POINT_CLASS, - GS_LINE_CLASS, - GS_TRIANGLE_CLASS, - GS_SPRITE_CLASS -}; - -enum GS_PSM -{ - PSM_PSMCT32, - PSM_PSMCT24, - PSM_PSMCT16, - PSM_PSMCT16S, - PSM_PSMZ32, - PSM_PSMZ24, - PSM_PSMZ16, - PSM_PSMZ16S, - PSM_PSMT8, - PSM_PSMT4, - PSM_PSMT8H, - PSM_PSMT4HL, - PSM_PSMT4HH, -}; - -enum GS_TFX -{ - TFX_MODULATE = 0, - TFX_DECAL = 1, - TFX_HIGHLIGHT = 2, - TFX_HIGHLIGHT2 = 3, - TFX_NONE = 4, -}; - -enum GS_CLAMP -{ - CLAMP_REGION_REPEAT = 0, - CLAMP_REPEAT = 1, - CLAMP_CLAMP = 2, - CLAMP_REGION_CLAMP = 3, -}; - -enum GS_ZTST -{ - ZTST_NEVER = 0, - ZTST_ALWAYS = 1, - ZTST_GEQUAL = 2, - ZTST_GREATER = 3, -}; - -enum GS_ATST -{ - ATST_NEVER = 0, - ATST_ALWAYS = 1, - ATST_LESS = 2, - ATST_LEQUAL = 3, - ATST_EQUAL = 4, - ATST_GEQUAL = 5, - ATST_GREATER = 6, - ATST_NOTEQUAL = 7, -}; - -enum GS_AFAIL -{ - AFAIL_KEEP = 0, - AFAIL_FB_ONLY = 1, - AFAIL_ZB_ONLY = 2, - AFAIL_RGB_ONLY = 3, -}; - -__constant uchar blockTable32[4][8] = -{ - { 0, 1, 4, 5, 16, 17, 20, 21}, - { 2, 3, 6, 7, 18, 19, 22, 23}, - { 8, 9, 12, 13, 24, 25, 28, 29}, - { 10, 11, 14, 15, 26, 27, 30, 31} -}; - -__constant uchar blockTable32Z[4][8] = -{ - { 24, 25, 28, 29, 8, 9, 12, 13}, - { 26, 27, 30, 31, 10, 11, 14, 15}, - { 16, 17, 20, 21, 0, 1, 4, 5}, - { 18, 19, 22, 23, 2, 3, 6, 7} -}; - -__constant uchar blockTable16[8][4] = -{ - { 0, 2, 8, 10 }, - { 1, 3, 9, 11 }, - { 4, 6, 12, 14 }, - { 5, 7, 13, 15 }, - { 16, 18, 24, 26 }, - { 17, 19, 25, 27 }, - { 20, 22, 28, 30 }, - { 21, 23, 29, 31 } -}; - -__constant uchar blockTable16S[8][4] = -{ - { 0, 2, 16, 18 }, - { 1, 3, 17, 19 }, - { 8, 10, 24, 26 }, - { 9, 11, 25, 27 }, - { 4, 6, 20, 22 }, - { 5, 7, 21, 23 }, - { 12, 14, 28, 30 }, - { 13, 15, 29, 31 } -}; - -__constant uchar blockTable16Z[8][4] = -{ - { 24, 26, 16, 18 }, - { 25, 27, 17, 19 }, - { 28, 30, 20, 22 }, - { 29, 31, 21, 23 }, - { 8, 10, 0, 2 }, - { 9, 11, 1, 3 }, - { 12, 14, 4, 6 }, - { 13, 15, 5, 7 } -}; - -__constant uchar blockTable16SZ[8][4] = -{ - { 24, 26, 8, 10 }, - { 25, 27, 9, 11 }, - { 16, 18, 0, 2 }, - { 17, 19, 1, 3 }, - { 28, 30, 12, 14 }, - { 29, 31, 13, 15 }, - { 20, 22, 4, 6 }, - { 21, 23, 5, 7 } -}; - -__constant uchar blockTable8[4][8] = -{ - { 0, 1, 4, 5, 16, 17, 20, 21}, - { 2, 3, 6, 7, 18, 19, 22, 23}, - { 8, 9, 12, 13, 24, 25, 28, 29}, - { 10, 11, 14, 15, 26, 27, 30, 31} -}; - -__constant uchar blockTable4[8][4] = -{ - { 0, 2, 8, 10 }, - { 1, 3, 9, 11 }, - { 4, 6, 12, 14 }, - { 5, 7, 13, 15 }, - { 16, 18, 24, 26 }, - { 17, 19, 25, 27 }, - { 20, 22, 28, 30 }, - { 21, 23, 29, 31 } -}; - -__constant uchar columnTable32[8][8] = -{ - { 0, 1, 4, 5, 8, 9, 12, 13 }, - { 2, 3, 6, 7, 10, 11, 14, 15 }, - { 16, 17, 20, 21, 24, 25, 28, 29 }, - { 18, 19, 22, 23, 26, 27, 30, 31 }, - { 32, 33, 36, 37, 40, 41, 44, 45 }, - { 34, 35, 38, 39, 42, 43, 46, 47 }, - { 48, 49, 52, 53, 56, 57, 60, 61 }, - { 50, 51, 54, 55, 58, 59, 62, 63 }, -}; - -__constant uchar columnTable16[8][16] = -{ - { 0, 2, 8, 10, 16, 18, 24, 26, - 1, 3, 9, 11, 17, 19, 25, 27 }, - { 4, 6, 12, 14, 20, 22, 28, 30, - 5, 7, 13, 15, 21, 23, 29, 31 }, - { 32, 34, 40, 42, 48, 50, 56, 58, - 33, 35, 41, 43, 49, 51, 57, 59 }, - { 36, 38, 44, 46, 52, 54, 60, 62, - 37, 39, 45, 47, 53, 55, 61, 63 }, - { 64, 66, 72, 74, 80, 82, 88, 90, - 65, 67, 73, 75, 81, 83, 89, 91 }, - { 68, 70, 76, 78, 84, 86, 92, 94, - 69, 71, 77, 79, 85, 87, 93, 95 }, - { 96, 98, 104, 106, 112, 114, 120, 122, - 97, 99, 105, 107, 113, 115, 121, 123 }, - { 100, 102, 108, 110, 116, 118, 124, 126, - 101, 103, 109, 111, 117, 119, 125, 127 }, -}; - -__constant uchar columnTable8[16][16] = -{ - { 0, 4, 16, 20, 32, 36, 48, 52, // column 0 - 2, 6, 18, 22, 34, 38, 50, 54 }, - { 8, 12, 24, 28, 40, 44, 56, 60, - 10, 14, 26, 30, 42, 46, 58, 62 }, - { 33, 37, 49, 53, 1, 5, 17, 21, - 35, 39, 51, 55, 3, 7, 19, 23 }, - { 41, 45, 57, 61, 9, 13, 25, 29, - 43, 47, 59, 63, 11, 15, 27, 31 }, - { 96, 100, 112, 116, 64, 68, 80, 84, // column 1 - 98, 102, 114, 118, 66, 70, 82, 86 }, - { 104, 108, 120, 124, 72, 76, 88, 92, - 106, 110, 122, 126, 74, 78, 90, 94 }, - { 65, 69, 81, 85, 97, 101, 113, 117, - 67, 71, 83, 87, 99, 103, 115, 119 }, - { 73, 77, 89, 93, 105, 109, 121, 125, - 75, 79, 91, 95, 107, 111, 123, 127 }, - { 128, 132, 144, 148, 160, 164, 176, 180, // column 2 - 130, 134, 146, 150, 162, 166, 178, 182 }, - { 136, 140, 152, 156, 168, 172, 184, 188, - 138, 142, 154, 158, 170, 174, 186, 190 }, - { 161, 165, 177, 181, 129, 133, 145, 149, - 163, 167, 179, 183, 131, 135, 147, 151 }, - { 169, 173, 185, 189, 137, 141, 153, 157, - 171, 175, 187, 191, 139, 143, 155, 159 }, - { 224, 228, 240, 244, 192, 196, 208, 212, // column 3 - 226, 230, 242, 246, 194, 198, 210, 214 }, - { 232, 236, 248, 252, 200, 204, 216, 220, - 234, 238, 250, 254, 202, 206, 218, 222 }, - { 193, 197, 209, 213, 225, 229, 241, 245, - 195, 199, 211, 215, 227, 231, 243, 247 }, - { 201, 205, 217, 221, 233, 237, 249, 253, - 203, 207, 219, 223, 235, 239, 251, 255 }, -}; - -__constant ushort columnTable4[16][32] = -{ - { 0, 8, 32, 40, 64, 72, 96, 104, // column 0 - 2, 10, 34, 42, 66, 74, 98, 106, - 4, 12, 36, 44, 68, 76, 100, 108, - 6, 14, 38, 46, 70, 78, 102, 110 }, - { 16, 24, 48, 56, 80, 88, 112, 120, - 18, 26, 50, 58, 82, 90, 114, 122, - 20, 28, 52, 60, 84, 92, 116, 124, - 22, 30, 54, 62, 86, 94, 118, 126 }, - { 65, 73, 97, 105, 1, 9, 33, 41, - 67, 75, 99, 107, 3, 11, 35, 43, - 69, 77, 101, 109, 5, 13, 37, 45, - 71, 79, 103, 111, 7, 15, 39, 47 }, - { 81, 89, 113, 121, 17, 25, 49, 57, - 83, 91, 115, 123, 19, 27, 51, 59, - 85, 93, 117, 125, 21, 29, 53, 61, - 87, 95, 119, 127, 23, 31, 55, 63 }, - { 192, 200, 224, 232, 128, 136, 160, 168, // column 1 - 194, 202, 226, 234, 130, 138, 162, 170, - 196, 204, 228, 236, 132, 140, 164, 172, - 198, 206, 230, 238, 134, 142, 166, 174 }, - { 208, 216, 240, 248, 144, 152, 176, 184, - 210, 218, 242, 250, 146, 154, 178, 186, - 212, 220, 244, 252, 148, 156, 180, 188, - 214, 222, 246, 254, 150, 158, 182, 190 }, - { 129, 137, 161, 169, 193, 201, 225, 233, - 131, 139, 163, 171, 195, 203, 227, 235, - 133, 141, 165, 173, 197, 205, 229, 237, - 135, 143, 167, 175, 199, 207, 231, 239 }, - { 145, 153, 177, 185, 209, 217, 241, 249, - 147, 155, 179, 187, 211, 219, 243, 251, - 149, 157, 181, 189, 213, 221, 245, 253, - 151, 159, 183, 191, 215, 223, 247, 255 }, - { 256, 264, 288, 296, 320, 328, 352, 360, // column 2 - 258, 266, 290, 298, 322, 330, 354, 362, - 260, 268, 292, 300, 324, 332, 356, 364, - 262, 270, 294, 302, 326, 334, 358, 366 }, - { 272, 280, 304, 312, 336, 344, 368, 376, - 274, 282, 306, 314, 338, 346, 370, 378, - 276, 284, 308, 316, 340, 348, 372, 380, - 278, 286, 310, 318, 342, 350, 374, 382 }, - { 321, 329, 353, 361, 257, 265, 289, 297, - 323, 331, 355, 363, 259, 267, 291, 299, - 325, 333, 357, 365, 261, 269, 293, 301, - 327, 335, 359, 367, 263, 271, 295, 303 }, - { 337, 345, 369, 377, 273, 281, 305, 313, - 339, 347, 371, 379, 275, 283, 307, 315, - 341, 349, 373, 381, 277, 285, 309, 317, - 343, 351, 375, 383, 279, 287, 311, 319 }, - { 448, 456, 480, 488, 384, 392, 416, 424, // column 3 - 450, 458, 482, 490, 386, 394, 418, 426, - 452, 460, 484, 492, 388, 396, 420, 428, - 454, 462, 486, 494, 390, 398, 422, 430 }, - { 464, 472, 496, 504, 400, 408, 432, 440, - 466, 474, 498, 506, 402, 410, 434, 442, - 468, 476, 500, 508, 404, 412, 436, 444, - 470, 478, 502, 510, 406, 414, 438, 446 }, - { 385, 393, 417, 425, 449, 457, 481, 489, - 387, 395, 419, 427, 451, 459, 483, 491, - 389, 397, 421, 429, 453, 461, 485, 493, - 391, 399, 423, 431, 455, 463, 487, 495 }, - { 401, 409, 433, 441, 465, 473, 497, 505, - 403, 411, 435, 443, 467, 475, 499, 507, - 405, 413, 437, 445, 469, 477, 501, 509, - 407, 415, 439, 447, 471, 479, 503, 511 }, -}; - -int BlockNumber32(int x, int y, int bp, int bw) -{ - return bp + mad24(y & ~0x1f, bw, (x >> 1) & ~0x1f) + blockTable32[(y >> 3) & 3][(x >> 3) & 7]; -} - -int BlockNumber16(int x, int y, int bp, int bw) -{ - return bp + mad24((y >> 1) & ~0x1f, bw, (x >> 1) & ~0x1f) + blockTable16[(y >> 3) & 7][(x >> 4) & 3]; -} - -int BlockNumber16S(int x, int y, int bp, int bw) -{ - return bp + mad24((y >> 1) & ~0x1f, bw, (x >> 1) & ~0x1f) + blockTable16S[(y >> 3) & 7][(x >> 4) & 3]; -} - -int BlockNumber32Z(int x, int y, int bp, int bw) -{ - return bp + mad24(y & ~0x1f, bw, (x >> 1) & ~0x1f) + blockTable32Z[(y >> 3) & 3][(x >> 3) & 7]; -} - -int BlockNumber16Z(int x, int y, int bp, int bw) -{ - return bp + mad24((y >> 1) & ~0x1f, bw, (x >> 1) & ~0x1f) + blockTable16Z[(y >> 3) & 7][(x >> 4) & 3]; -} - -int BlockNumber16SZ(int x, int y, int bp, int bw) -{ - return bp + mad24((y >> 1) & ~0x1f, bw, (x >> 1) & ~0x1f) + blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3]; -} - -int BlockNumber8(int x, int y, int bp, int bw) -{ - return bp + mad24((y >> 1) & ~0x1f, bw >> 1, (x >> 2) & ~0x1f) + blockTable8[(y >> 4) & 3][(x >> 4) & 7]; -} - -int BlockNumber4(int x, int y, int bp, int bw) -{ - return bp + mad24((y >> 2) & ~0x1f, bw >> 1, (x >> 2) & ~0x1f) + blockTable4[(y >> 4) & 7][(x >> 5) & 3]; -} - -int PixelAddress32(int x, int y, int bp, int bw) -{ - return (BlockNumber32(x, y, bp, bw) << 6) + columnTable32[y & 7][x & 7]; -} - -int PixelAddress16(int x, int y, int bp, int bw) -{ - return (BlockNumber16(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15]; -} - -int PixelAddress16S(int x, int y, int bp, int bw) -{ - return (BlockNumber16S(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15]; -} - -int PixelAddress32Z(int x, int y, int bp, int bw) -{ - return (BlockNumber32Z(x, y, bp, bw) << 6) + columnTable32[y & 7][x & 7]; -} - -int PixelAddress16Z(int x, int y, int bp, int bw) -{ - return (BlockNumber16Z(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15]; -} - -int PixelAddress16SZ(int x, int y, int bp, int bw) -{ - return (BlockNumber16SZ(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15]; -} - -int PixelAddress8(int x, int y, int bp, int bw) -{ - return (BlockNumber8(x, y, bp, bw) << 8) + columnTable8[y & 15][x & 15]; -} - -int PixelAddress4(int x, int y, int bp, int bw) -{ - return (BlockNumber4(x, y, bp, bw) << 9) + columnTable4[y & 15][x & 31]; -} - -int PixelAddress(int x, int y, int bp, int bw, int psm) -{ - switch(psm) - { - default: - case PSM_PSMCT32: - case PSM_PSMCT24: - case PSM_PSMT8H: - case PSM_PSMT4HL: - case PSM_PSMT4HH: - return PixelAddress32(x, y, bp, bw); - case PSM_PSMCT16: - return PixelAddress16(x, y, bp, bw); - case PSM_PSMCT16S: - return PixelAddress16S(x, y, bp, bw); - case PSM_PSMZ32: - case PSM_PSMZ24: - return PixelAddress32Z(x, y, bp, bw); - case PSM_PSMZ16: - return PixelAddress16Z(x, y, bp, bw); - case PSM_PSMZ16S: - return PixelAddress16SZ(x, y, bp, bw); - case PSM_PSMT8: - return PixelAddress8(x, y, bp, bw); - case PSM_PSMT4: - return PixelAddress4(x, y, bp, bw); - } -} - -uint ReadFrame(__global uchar* vm, int addr, int psm) -{ - switch(psm) - { - default: - case PSM_PSMCT32: - case PSM_PSMCT24: - case PSM_PSMZ32: - case PSM_PSMZ24: - return ((__global uint*)vm)[addr]; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - return ((__global ushort*)vm)[addr]; - } -} - -void WriteFrame(__global uchar* vm, int addr, int psm, uint value) -{ - switch(psm) - { - default: - case PSM_PSMCT32: - case PSM_PSMZ32: - case PSM_PSMCT24: - case PSM_PSMZ24: - ((__global uint*)vm)[addr] = value; - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - ((__global ushort*)vm)[addr] = (ushort)value; - break; - } -} - -bool is16bit(int psm) -{ - return psm < 8 && (psm & 3) >= 2; -} - -bool is24bit(int psm) -{ - return psm < 8 && (psm & 3) == 1; -} - -bool is32bit(int psm) -{ - return psm < 8 && (psm & 3) == 0; -} - -#ifdef PRIM - -int GetVertexPerPrim(int prim_class) -{ - switch(prim_class) - { - default: - case GS_POINT_CLASS: return 1; - case GS_LINE_CLASS: return 2; - case GS_TRIANGLE_CLASS: return 3; - case GS_SPRITE_CLASS: return 2; - } -} - -#define VERTEX_PER_PRIM GetVertexPerPrim(PRIM) - -#endif - -#ifdef KERNEL_PRIM - -__kernel void KERNEL_PRIM( - __global gs_env* env, - __global uchar* vb_base, - __global uchar* ib_base, - __global uchar* pb_base, - uint vb_start, - uint ib_start, - uint pb_start) -{ - size_t prim_index = get_global_id(0); - - __global gs_vertex* vb = (__global gs_vertex*)(vb_base + vb_start); - __global uint* ib = (__global uint*)(ib_base + ib_start); - __global gs_prim* prim = &env->prim[prim_index]; - - ib += prim_index * VERTEX_PER_PRIM; - - uint pb_index = ib[0] >> 24; - - prim->pb_index = pb_index; - - __global gs_param* pb = (__global gs_param*)(pb_base + pb_start + pb_index * TFX_PARAM_SIZE); - - __global gs_vertex* v0 = &vb[ib[0] & 0x00ffffff]; - __global gs_vertex* v1 = &vb[ib[1] & 0x00ffffff]; - __global gs_vertex* v2 = &vb[ib[2] & 0x00ffffff]; - - int2 pmin, pmax; - - if(PRIM == GS_POINT_CLASS) - { - pmin = pmax = convert_int2_rte(v0->p.xy); - - prim->v[0].p = v0->p; - prim->v[0].tc = v0->tc; - } - else if(PRIM == GS_LINE_CLASS) - { - int2 p0 = convert_int2_rte(v0->p.xy); - int2 p1 = convert_int2_rte(v1->p.xy); - - pmin = min(p0, p1); - pmax = max(p0, p1); - } - else if(PRIM == GS_TRIANGLE_CLASS) - { - int2 p0 = convert_int2_rtp(v0->p.xy); - int2 p1 = convert_int2_rtp(v1->p.xy); - int2 p2 = convert_int2_rtp(v2->p.xy); - - pmin = min(min(p0, p1), p2); - pmax = max(max(p0, p1), p2); - - // z needs special care, since it's a 32 bit unit, float cannot encode it exactly - // only interpolate the relative to zmin and hopefully small values - - uint zmin = min(min(v0->z, v1->z), v2->z); - uint zmax = max(max(v0->z, v1->z), v2->z); - - prim->v[0].p = (float4)(v0->p.x, v0->p.y, as_float(v0->z - zmin), v0->p.w); - prim->v[0].tc = v0->tc; - prim->v[1].p = (float4)(v1->p.x, v1->p.y, as_float(v1->z - zmin), v1->p.w); - prim->v[1].tc = v1->tc; - prim->v[2].p = (float4)(v2->p.x, v2->p.y, as_float(v2->z - zmin), v2->p.w); - prim->v[2].tc = v2->tc; - - prim->zmin = zmin; - prim->zmax = zmax; - - float4 dp0 = v1->p - v0->p; - float4 dp1 = v0->p - v2->p; - float4 dp2 = v2->p - v1->p; - - float cp = dp0.x * dp1.y - dp0.y * dp1.x; - - if(cp != 0.0f) - { - cp = native_recip(cp); - - float2 u = dp0.xy * cp; - float2 v = -dp1.xy * cp; - - // v0 has the (0, 0, 1) barycentric coord, v1: (0, 1, 0), v2: (1, 0, 0) - - gs_barycentric b; - - b.dx = (float4)(-v.y, u.y, v.y - u.y, v0->p.x); - b.dy = (float4)(v.x, -u.x, u.x - v.x, v0->p.y); - - dp0.xy = dp0.xy * sign(cp); - dp1.xy = dp1.xy * sign(cp); - dp2.xy = dp2.xy * sign(cp); - - b.zero.x = select(0.0f, CL_FLT_EPSILON, (dp1.y < 0) | ((dp1.y == 0) & (dp1.x > 0))); - b.zero.y = select(0.0f, CL_FLT_EPSILON, (dp0.y < 0) | ((dp0.y == 0) & (dp0.x > 0))); - b.zero.z = select(0.0f, CL_FLT_EPSILON, (dp2.y < 0) | ((dp2.y == 0) & (dp2.x > 0))); - - // any barycentric(reject_corner) < 0, tile outside the triangle - - b.reject_corner.x = 0.0f + max(max(max(b.dx.x + b.dy.x, b.dx.x), b.dy.x), 0.0f) * BIN_SIZE; - b.reject_corner.y = 0.0f + max(max(max(b.dx.y + b.dy.y, b.dx.y), b.dy.y), 0.0f) * BIN_SIZE; - b.reject_corner.z = 1.0f + max(max(max(b.dx.z + b.dy.z, b.dx.z), b.dy.z), 0.0f) * BIN_SIZE; - - // TODO: accept_corner, at min value, all barycentric(accept_corner) >= 0, tile fully inside, no per pixel hittest needed - - env->barycentric[prim_index] = b; - } - else // triangle has zero area - { - pmax = -1; // won't get included in any tile - } - } - else if(PRIM == GS_SPRITE_CLASS) - { - int2 p0 = convert_int2_rtp(v0->p.xy); - int2 p1 = convert_int2_rtp(v1->p.xy); - - pmin = min(p0, p1); - pmax = max(p0, p1); - - int4 mask = (int4)(v0->p.xy > v1->p.xy, 0, 0); - - prim->v[0].p = select(v0->p, v1->p, mask); // pmin - prim->v[0].tc = select(v0->tc, v1->tc, mask); - prim->v[1].p = select(v1->p, v0->p, mask); // pmax - prim->v[1].tc = select(v1->tc, v0->tc, mask); - prim->v[1].tc.xy = (prim->v[1].tc.xy - prim->v[0].tc.xy) / (prim->v[1].p.xy - prim->v[0].p.xy); - } - - int4 scissor = pb->scissor; - - pmin = select(pmin, scissor.xy, pmin < scissor.xy); - pmax = select(pmax, scissor.zw, pmax > scissor.zw); - - int4 r = (int4)(pmin, pmax + (int2)(BIN_SIZE - 1)) >> BIN_SIZE_BITS; - - env->bbox[prim_index] = convert_uchar4_sat(r); -} - -#endif - -#ifdef KERNEL_TILE - -int tile_in_triangle(float2 p, gs_barycentric b) -{ - float3 f = b.dx.xyz * (p.x - b.dx.w) + b.dy.xyz * (p.y - b.dy.w) + b.reject_corner.xyz; - - f = select(f, (float3)(0.0f), fabs(f) < (float3)(CL_FLT_EPSILON * 10)); - - return all(f >= b.zero.xyz); -} - -#if CLEAR == 1 - -__kernel void KERNEL_TILE(__global gs_env* env) -{ - env->bounds[get_global_id(0)].first = -1; - env->bounds[get_global_id(0)].last = 0; -} - -#elif MODE < 3 - -#if MAX_PRIM_PER_BATCH != 32 - #error "MAX_PRIM_PER_BATCH != 32" -#endif - -#define MAX_PRIM_PER_GROUP (32u >> MODE) - -__kernel void KERNEL_TILE( - __global gs_env* env, - uint prim_count, - uint bin_count, // == bin_dim.z * bin_dim.w - uchar4 bin_dim) -{ - uint batch_index = get_group_id(2) >> MODE; - uint prim_start = get_group_id(2) << (5 - MODE); - uint group_prim_index = get_local_id(2); - uint bin_index = get_local_id(1) * get_local_size(0) + get_local_id(0); - - __global BIN_TYPE* bin = &env->bin[batch_index * bin_count]; - __global uchar4* bbox = &env->bbox[prim_start]; - __global gs_barycentric* barycentric = &env->barycentric[prim_start]; - - __local uchar4 bbox_cache[MAX_PRIM_PER_GROUP]; - __local gs_barycentric barycentric_cache[MAX_PRIM_PER_GROUP]; - __local uint visible[8 << MODE]; - - if(get_local_id(2) == 0) - { - visible[bin_index] = 0; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - uint group_prim_count = min(prim_count - prim_start, MAX_PRIM_PER_GROUP); - - event_t e = async_work_group_copy(bbox_cache, bbox, group_prim_count, 0); - - wait_group_events(1, &e); - - if(PRIM == GS_TRIANGLE_CLASS) - { - e = async_work_group_copy((__local float4*)barycentric_cache, (__global float4*)barycentric, group_prim_count * (sizeof(gs_barycentric) / sizeof(float4)), 0); - - wait_group_events(1, &e); - } - - if(group_prim_index < group_prim_count) - { - int x = bin_dim.x + get_local_id(0); - int y = bin_dim.y + get_local_id(1); - - uchar4 r = bbox_cache[group_prim_index]; - - uint test = (r.x <= x) & (r.z > x) & (r.y <= y) & (r.w > y); - - if(PRIM == GS_TRIANGLE_CLASS && test != 0) - { - test = tile_in_triangle(convert_float2((int2)(x, y) << BIN_SIZE_BITS), barycentric_cache[group_prim_index]); - } - - atomic_or(&visible[bin_index], test << ((MAX_PRIM_PER_GROUP - 1) - get_local_id(2))); - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if(get_local_id(2) == 0) - { - #if MODE == 0 - ((__global uint*)&bin[bin_index])[0] = visible[bin_index]; - #elif MODE == 1 - ((__global ushort*)&bin[bin_index])[1 - (get_group_id(2) & 1)] = visible[bin_index]; - #elif MODE == 2 - ((__global uchar*)&bin[bin_index])[3 - (get_group_id(2) & 3)] = visible[bin_index]; - #endif - - if(visible[bin_index] != 0) - { - atomic_min(&env->bounds[bin_index].first, batch_index); - atomic_max(&env->bounds[bin_index].last, batch_index); - } - } -} - -#elif MODE == 3 - -__kernel void KERNEL_TILE( - __global gs_env* env, - uint prim_count, - uint bin_count, // == bin_dim.z * bin_dim.w - uchar4 bin_dim) -{ - size_t batch_index = get_group_id(0); - size_t local_id = get_local_id(0); - size_t local_size = get_local_size(0); - - uint batch_prim_count = min(prim_count - (batch_index << MAX_PRIM_PER_BATCH_BITS), MAX_PRIM_PER_BATCH); - - __global BIN_TYPE* bin = &env->bin[batch_index * bin_count]; - __global uchar4* bbox = &env->bbox[batch_index << MAX_PRIM_PER_BATCH_BITS]; - __global gs_barycentric* barycentric = &env->barycentric[batch_index << MAX_PRIM_PER_BATCH_BITS]; - - __local uchar4 bbox_cache[MAX_PRIM_PER_BATCH]; - __local gs_barycentric barycentric_cache[MAX_PRIM_PER_BATCH]; - - event_t e = async_work_group_copy(bbox_cache, bbox, batch_prim_count, 0); - - wait_group_events(1, &e); - - if(PRIM == GS_TRIANGLE_CLASS) - { - e = async_work_group_copy((__local float4*)barycentric_cache, (__global float4*)barycentric, batch_prim_count * (sizeof(gs_barycentric) / sizeof(float4)), 0); - - wait_group_events(1, &e); - } - - for(uint bin_index = local_id; bin_index < bin_count; bin_index += local_size) - { - int y = bin_index / bin_dim.z; // TODO: very expensive, no integer divider on current hardware - int x = bin_index - y * bin_dim.z; - - x += bin_dim.x; - y += bin_dim.y; - - BIN_TYPE visible = 0; - - for(uint i = 0; i < batch_prim_count; i++) - { - uchar4 r = bbox_cache[i]; - - BIN_TYPE test = (r.x <= x) & (r.z > x) & (r.y <= y) & (r.w > y); - - if(PRIM == GS_TRIANGLE_CLASS && test != 0) - { - test = tile_in_triangle(convert_float2((int2)(x, y) << BIN_SIZE_BITS), barycentric_cache[i]); - } - - visible |= test << ((MAX_PRIM_PER_BATCH - 1) - i); - } - - bin[bin_index] = visible; - - if(visible != 0) - { - atomic_min(&env->bounds[bin_index].first, batch_index); - atomic_max(&env->bounds[bin_index].last, batch_index); - } - } -} - -#endif - -#endif - -#ifdef KERNEL_TFX - -bool ZTest(uint zs, uint zd) -{ - if(ZTEST) - { - if(is24bit(ZPSM)) zd &= 0x00ffffff; - - switch(ZTST) - { - case ZTST_NEVER: - return false; - case ZTST_ALWAYS: - return true; - case ZTST_GEQUAL: - return zs >= zd; - case ZTST_GREATER: - return zs > zd; - } - } - - return true; -} - -bool AlphaTest(int alpha, int aref, uint* fm, uint* zm) -{ - switch(AFAIL) - { - case AFAIL_KEEP: - break; - case AFAIL_FB_ONLY: - if(!ZWRITE) return true; - break; - case AFAIL_ZB_ONLY: - if(!FWRITE) return true; - break; - case AFAIL_RGB_ONLY: - if(!ZWRITE && is24bit(FPSM)) return true; - break; - } - - uint pass; - - switch(ATST) - { - case ATST_NEVER: - pass = false; - break; - case ATST_ALWAYS: - return true; - case ATST_LESS: - pass = alpha < aref; - break; - case ATST_LEQUAL: - pass = alpha <= aref; - break; - case ATST_EQUAL: - pass = alpha == aref; - break; - case ATST_GEQUAL: - pass = alpha >= aref; - break; - case ATST_GREATER: - pass = alpha > aref; - break; - case ATST_NOTEQUAL: - pass = alpha != aref; - break; - } - - switch(AFAIL) - { - case AFAIL_KEEP: - return pass; - case AFAIL_FB_ONLY: - *zm |= pass ? 0 : 0xffffffff; - break; - case AFAIL_ZB_ONLY: - *fm |= pass ? 0 : 0xffffffff; - break; - case AFAIL_RGB_ONLY: - if(is32bit(FPSM)) *fm |= pass ? 0 : 0xff000000; - if(is16bit(FPSM)) *fm |= pass ? 0 : 0xffff8000; - *zm |= pass ? 0 : 0xffffffff; - break; - } - - return true; -} - -bool DestAlphaTest(uint fd) -{ - if(DATE) - { - if(DATM) - { - if(is32bit(FPSM)) return (fd & 0x80000000) != 0; - if(is16bit(FPSM)) return (fd & 0x00008000) != 0; - } - else - { - if(is32bit(FPSM)) return (fd & 0x80000000) == 0; - if(is16bit(FPSM)) return (fd & 0x00008000) == 0; - } - } - - return true; -} - -int Wrap(int a, int b, int c, int mode) -{ - if(MERGED) - { - return select((a & b) | c, clamp(a, b, c), (mode & 2) != 0); - } - else - { - switch(mode) - { - case CLAMP_REGION_REPEAT: - return (a & b) | c; - case CLAMP_REPEAT: - return a & b; - case CLAMP_CLAMP: - return clamp(a, 0, c); - case CLAMP_REGION_CLAMP: - return clamp(a, b, c); - } - } -} - -int4 AlphaBlend(int4 c, uint fd, int afix, uint2 sel) -{ - if(FWRITE && (ABE || AA1)) - { - int4 cs = c; - int4 cd; - - if(ABA != ABB && (ABA == 1 || ABB == 1 || ABC == 1) || ABD == 1 || MERGED) - { - if(is32bit(FPSM) || is24bit(FPSM)) - { - cd.x = fd & 0xff; - cd.y = (fd >> 8) & 0xff; - cd.z = (fd >> 16) & 0xff; - cd.w = fd >> 24; - } - else if(is16bit(FPSM)) - { - cd.x = (fd << 3) & 0xf8; - cd.y = (fd >> 2) & 0xf8; - cd.z = (fd >> 7) & 0xf8; - cd.w = (fd >> 8) & 0x80; - } - } - - if(MERGED) - { - int aba = TFX_ABA(sel); - int abb = TFX_ABB(sel); - int abc = TFX_ABC(sel); - int abd = TFX_ABD(sel); - - int ad = !is24bit(FPSM) ? cd.w : 0x80; - - int3 A = aba == 0 ? cs.xyz : aba == 1 ? cd.xyz : 0; - int3 B = abb == 0 ? cs.xyz : abb == 1 ? cd.xyz : 0; - int C = abc == 0 ? cs.w : abc == 1 ? ad : afix; - int3 D = abd == 0 ? cs.xyz : abd == 1 ? cd.xyz : 0; - - c.xyz = (mul24(A - B, C) >> 7) + D; - } - else - { - if(ABA != ABB) - { - switch(ABA) - { - case 0: break; // c.xyz = cs.xyz; - case 1: c.xyz = cd.xyz; break; - case 2: c.xyz = 0; break; - } - - switch(ABB) - { - case 0: c.xyz -= cs.xyz; break; - case 1: c.xyz -= cd.xyz; break; - case 2: break; - } - - if(!(is24bit(FPSM) && ABC == 1)) - { - int a = 0; - - switch(ABC) - { - case 0: a = cs.w; break; - case 1: a = cd.w; break; - case 2: a = afix; break; - } - - c.xyz = c.xyz * a >> 7; - } - - switch(ABD) - { - case 0: c.xyz += cs.xyz; break; - case 1: c.xyz += cd.xyz; break; - case 2: break; - } - } - else - { - switch(ABD) - { - case 0: break; - case 1: c.xyz = cd.xyz; break; - case 2: c.xyz = 0; break; - } - } - } - - if(PABE) - { - c.xyz = select(cs.xyz, c.xyz, (int3)(cs.w << 24)); - } - } - - return c; -} - -uchar4 Expand24To32(uint rgba, uchar ta0) -{ - uchar4 c; - - c.x = rgba & 0xff; - c.y = (rgba >> 8) & 0xff; - c.z = (rgba >> 16) & 0xff; - c.w = !AEM || (rgba & 0xffffff) != 0 ? ta0 : 0; - - return c; -} - -uchar4 Expand16To32(ushort rgba, uchar ta0, uchar ta1) -{ - uchar4 c; - - c.x = (rgba << 3) & 0xf8; - c.y = (rgba >> 2) & 0xf8; - c.z = (rgba >> 7) & 0xf8; - c.w = !AEM || (rgba & 0x7fff) != 0 ? ((rgba & 0x8000) ? ta1 : ta0) : 0; - - return c; -} - -int4 ReadTexel(__global uchar* vm, int x, int y, int level, __global gs_param* pb) -{ - uchar4 c; - - uint addr = PixelAddress(x, y, pb->tbp[level], pb->tbw[level], TPSM); - - __global ushort* vm16 = (__global ushort*)vm; - __global uint* vm32 = (__global uint*)vm; - - switch(TPSM) - { - default: - case PSM_PSMCT32: - case PSM_PSMZ32: - c = ((__global uchar4*)vm)[addr]; - break; - case PSM_PSMCT24: - case PSM_PSMZ24: - c = Expand24To32(vm32[addr], pb->ta0); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - c = Expand16To32(vm16[addr], pb->ta0, pb->ta1); - break; - case PSM_PSMT8: - c = pb->clut[vm[addr]]; - break; - case PSM_PSMT4: - c = pb->clut[(vm[addr >> 1] >> ((addr & 1) << 2)) & 0x0f]; - break; - case PSM_PSMT8H: - c = pb->clut[vm32[addr] >> 24]; - break; - case PSM_PSMT4HL: - c = pb->clut[(vm32[addr] >> 24) & 0x0f]; - break; - case PSM_PSMT4HH: - c = pb->clut[(vm32[addr] >> 28) & 0x0f]; - break; - } - - //printf("[%d %d] %05x %d %d %08x | %v4hhd | %08x\n", x, y, pb->tbp[level], pb->tbw[level], TPSM, addr, c, vm[addr]); - - return convert_int4(c); -} - -int4 SampleTexture(__global uchar* tex, __global gs_param* pb, float3 t) -{ - int4 c; - - if(0)//if(MMIN) - { - // TODO - } - else - { - int2 uv; - - if(!FST) - { - uv = convert_int2_rte(t.xy * native_recip(t.z)); - } - else - { - // sfex capcom logo third drawing call at (0,223) calculated as: - // t0 + (p - p0) * (t - t0) / (p1 - p0) - // 0.5 + (223 - 0) * (112.5 - 0.5) / (224 - 0) = 112 - // due to rounding errors (multiply-add instruction maybe): - // t.y = 111.999..., uv0.y = 111, uvf.y = 15/16, off by 1/16 texel vertically after interpolation - // TODO: sw renderer samples at 112 exactly, check which one is correct - - // last line error in persona 3 movie clips if rounding is enabled - - uv = convert_int2(t.xy); - } - - if(LTF) uv -= 0x0008; - - int2 uvf = uv & 0x000f; - - int2 uv0 = uv >> 4; - int2 uv1 = uv0 + 1; - - uv0.x = Wrap(uv0.x, pb->minu, pb->maxu, MERGED ? TFX_WMS(pb->sel) : WMS); - uv0.y = Wrap(uv0.y, pb->minv, pb->maxv, MERGED ? TFX_WMT(pb->sel) : WMT); - uv1.x = Wrap(uv1.x, pb->minu, pb->maxu, MERGED ? TFX_WMS(pb->sel) : WMS); - uv1.y = Wrap(uv1.y, pb->minv, pb->maxv, MERGED ? TFX_WMT(pb->sel) : WMT); - - int4 c00 = ReadTexel(tex, uv0.x, uv0.y, 0, pb); - int4 c01 = ReadTexel(tex, uv1.x, uv0.y, 0, pb); - int4 c10 = ReadTexel(tex, uv0.x, uv1.y, 0, pb); - int4 c11 = ReadTexel(tex, uv1.x, uv1.y, 0, pb); - - if(LTF) - { - c00 = (mul24(c01 - c00, uvf.x) >> 4) + c00; - c10 = (mul24(c11 - c10, uvf.x) >> 4) + c10; - c00 = (mul24(c10 - c00, uvf.y) >> 4) + c00; - } - - c = c00; - } - - return c; -} - -// TODO: 2x2 MSAA idea -// downsize the rendering tile to 16x8 or 8x8 and render 2x2 sub-pixels to __local -// hittest and ztest 2x2 (create write mask, only skip if all -1) -// calculate color 1x1, alpha tests 1x1 -// use mask to filter failed sub-pixels when writing to __local -// needs the tile data to be fetched at the beginning, even if rfb/zfb is not set, unless we know the tile is fully covered -// multiple work-items may render different prims to the same 2x2 sub-pixel, averaging can only be done after a barrier at the very end -// pb->fm? alpha channel and following alpha tests? some games may depend on exact results, not some average - -__kernel __attribute__((reqd_work_group_size(8, 8, 1))) void KERNEL_TFX( - __global gs_env* env, - __global uchar* vm, - __global uchar* tex, - __global uchar* pb_base, - uint pb_start, - uint prim_start, - uint prim_count, - uint bin_count, // == bin_dim.z * bin_dim.w - uchar4 bin_dim, - uint fbp, - uint zbp, - uint bw) -{ - uint x = get_global_id(0); - uint y = get_global_id(1); - - uint bin_x = (x >> BIN_SIZE_BITS) - bin_dim.x; - uint bin_y = (y >> BIN_SIZE_BITS) - bin_dim.y; - uint bin_index = mad24(bin_y, (uint)bin_dim.z, bin_x); - - uint batch_first = env->bounds[bin_index].first; - uint batch_last = env->bounds[bin_index].last; - uint batch_start = prim_start >> MAX_PRIM_PER_BATCH_BITS; - - if(batch_last < batch_first) - { - return; - } - - uint skip; - - if(batch_start < batch_first) - { - uint n = (batch_first - batch_start) * MAX_PRIM_PER_BATCH - (prim_start & (MAX_PRIM_PER_BATCH - 1)); - - if(n > prim_count) - { - return; - } - - skip = 0; - prim_count -= n; - batch_start = batch_first; - } - else - { - skip = prim_start & (MAX_PRIM_PER_BATCH - 1); - prim_count += skip; - } - - if(batch_start > batch_last) - { - return; - } - - prim_count = min(prim_count, (batch_last - batch_start + 1) << MAX_PRIM_PER_BATCH_BITS); - - // - - int2 pi = (int2)(x, y); - float2 pf = convert_float2(pi); - - int faddr = PixelAddress(x, y, fbp, bw, FPSM); - int zaddr = PixelAddress(x, y, zbp, bw, ZPSM); - - uint fd, zd; // TODO: fd as int4 and only pack before writing out? - - if(RFB) - { - fd = ReadFrame(vm, faddr, FPSM); - } - - if(RZB) - { - zd = ReadFrame(vm, zaddr, ZPSM); - } - - // early destination alpha test - - if(!DestAlphaTest(fd)) - { - return; - } - - // - - uint fragments = 0; - - __global BIN_TYPE* bin = &env->bin[bin_index + batch_start * bin_count]; // TODO: not needed for "one tile case" - __global gs_prim* prim_base = &env->prim[batch_start << MAX_PRIM_PER_BATCH_BITS]; - __global gs_barycentric* barycentric = &env->barycentric[batch_start << MAX_PRIM_PER_BATCH_BITS]; - - pb_base += pb_start; - - BIN_TYPE bin_value = *bin & ((BIN_TYPE)-1 >> skip); - - for(uint prim_index = 0; prim_index < prim_count; prim_index += MAX_PRIM_PER_BATCH) - { - while(bin_value != 0) - { - uint i = clz(bin_value); - - if(prim_index + i >= prim_count) - { - break; - } - - bin_value ^= (BIN_TYPE)1 << ((MAX_PRIM_PER_BATCH - 1) - i); // bin_value &= (ulong)-1 >> (i + 1); - - __global gs_prim* prim = &prim_base[prim_index + i]; - __global gs_param* pb = (__global gs_param*)(pb_base + prim->pb_index * TFX_PARAM_SIZE); - - if(!NOSCISSOR) - { - if(!all((pi >= pb->scissor.xy) & (pi < pb->scissor.zw))) - { - continue; - } - } - - uint2 zf; - float3 t; - int4 c; - - // TODO: do not hittest if we know the tile is fully inside the prim - - if(PRIM == GS_POINT_CLASS) - { - float2 dpf = pf - prim->v[0].p.xy; - - if(!all((dpf <= 0.5f) & (dpf > -0.5f))) - { - continue; - } - - zf = as_uint2(prim->v[0].p.zw); - t = prim->v[0].tc.xyz; - c = convert_int4(prim->v[0].c); - } - else if(PRIM == GS_LINE_CLASS) - { - // TODO: find point on line prependicular to (x,y), distance.x < 0.5f || distance.y < 0.5f - // TODO: aa1: coverage ~ distance.x/y, slope selects x or y, zwrite disabled - // TODO: do not draw last pixel of the line - - continue; - } - else if(PRIM == GS_TRIANGLE_CLASS) - { - // TODO: aa1: draw edge as a line - - if(!ZTest(prim->zmax, zd)) - { - continue; - } - - __global gs_barycentric* b = &barycentric[prim_index + i]; - - float3 f = b->dx.xyz * (pf.x - b->dx.w) + b->dy.xyz * (pf.y - b->dy.w) + (float3)(0, 0, 1); - - if(!all(select(f, (float3)(0.0f), fabs(f) < (float3)(CL_FLT_EPSILON * 10)) >= b->zero.xyz)) - { - continue; - } - - float2 zf0 = convert_float2(as_uint2(prim->v[0].p.zw)); - float2 zf1 = convert_float2(as_uint2(prim->v[1].p.zw)); - float2 zf2 = convert_float2(as_uint2(prim->v[2].p.zw)); - - zf.x = convert_uint_rte(zf0.x * f.z + zf1.x * f.x + zf2.x * f.y) + prim->zmin; - zf.y = convert_uint_rte(zf0.y * f.z + zf1.y * f.x + zf2.y * f.y); - - t = prim->v[0].tc.xyz * f.z + prim->v[1].tc.xyz * f.x + prim->v[2].tc.xyz * f.y; - - if(IIP) - { - float4 c0 = convert_float4(prim->v[0].c); - float4 c1 = convert_float4(prim->v[1].c); - float4 c2 = convert_float4(prim->v[2].c); - - c = convert_int4_rte(c0 * f.z + c1 * f.x + c2 * f.y); - } - else - { - c = convert_int4(prim->v[2].c); - } - } - else if(PRIM == GS_SPRITE_CLASS) - { - int2 tl = convert_int2_rtp(prim->v[0].p.xy); - int2 br = convert_int2_rtp(prim->v[1].p.xy); - - if(!all((pi >= tl) & (pi < br))) - { - continue; - } - - zf = as_uint2(prim->v[1].p.zw); - - t.xy = prim->v[0].tc.xy + prim->v[1].tc.xy * (pf - prim->v[0].p.xy); - t.z = prim->v[0].tc.z; - - c = convert_int4(prim->v[1].c); - } - - // z test - - uint zs = zf.x; - - if(!ZTest(zs, zd)) - { - continue; - } - - // sample texture - - int4 ct; - - if(TFX != TFX_NONE) - { - ct = SampleTexture(tex, pb, t); - } - - // alpha tfx - - int alpha = c.w; - - if(FB) - { - if(TCC) - { - switch(TFX) - { - case TFX_MODULATE: - c.w = clamp(mul24(ct.w, c.w) >> 7, 0, 0xff); - break; - case TFX_DECAL: - c.w = ct.w; - break; - case TFX_HIGHLIGHT: - c.w = clamp(ct.w + c.w, 0, 0xff); - break; - case TFX_HIGHLIGHT2: - c.w = ct.w; - break; - } - } - - if(AA1) - { - if(!ABE || c.w == 0x80) - { - c.w = 0x80; // TODO: edge ? coverage : 0x80 - } - } - } - - // read mask - - uint fm = pb->fm; - uint zm = pb->zm; - - // alpha test - - if(!AlphaTest(c.w, pb->aref, &fm, &zm)) - { - continue; - } - - // all tests done, we have a new output - - fragments++; - - // write z - - if(ZWRITE) - { - zd = RZB ? bitselect(zs, zd, zm) : zs; - } - - // rgb tfx - - if(FWRITE) - { - switch(TFX) - { - case TFX_MODULATE: - c.xyz = clamp(mul24(ct.xyz, c.xyz) >> 7, 0, 0xff); - break; - case TFX_DECAL: - c.xyz = ct.xyz; - break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - c.xyz = clamp((mul24(ct.xyz, c.xyz) >> 7) + alpha, 0, 0xff); - break; - } - } - - // fog - - if(FWRITE && FGE) - { - int fog = (int)zf.y; - - int3 fv = mul24(c.xyz, fog) >> 8; - int3 fc = mul24(convert_int4(pb->fog).xyz, 0xff - fog) >> 8; - - c.xyz = fv + fc; - } - - // alpha blend - - c = AlphaBlend(c, fd, pb->afix, pb->sel); - - // write frame - - if(FWRITE) - { - if(DTHE && is16bit(FPSM)) - { - c.xyz += pb->dimx[y & 3][x & 3]; - } - - c = COLCLAMP ? clamp(c, 0, 0xff) : c & 0xff; - - if(FBA && !is24bit(FPSM)) - { - c.w |= 0x80; - } - - uint fs; - - if(is32bit(FPSM)) - { - fs = (c.w << 24) | (c.z << 16) | (c.y << 8) | c.x; - } - else if(is24bit(FPSM)) - { - fs = (c.z << 16) | (c.y << 8) | c.x; - } - else if(is16bit(FPSM)) - { - fs = ((c.w & 0x80) << 8) | ((c.z & 0xf8) << 7) | ((c.y & 0xf8) << 2) | (c.x >> 3); - } - - fd = RFB ? bitselect(fs, fd, fm) : fs; - - // dest alpha test for the next loop - - if(!DestAlphaTest(fd)) - { - prim_index = prim_count; // game over - - break; - } - } - } - - bin += bin_count; - bin_value = *bin; - } - - if(fragments > 0) - { - if(ZWRITE) - { - WriteFrame(vm, zaddr, ZPSM, zd); - } - - if(FWRITE) - { - WriteFrame(vm, faddr, FPSM, fd); - } - } -} - -#endif - -#endif diff --git a/plugins/GSdx_legacy/res/tfx.fx b/plugins/GSdx_legacy/res/tfx.fx deleted file mode 100644 index 518e884a6e..0000000000 --- a/plugins/GSdx_legacy/res/tfx.fx +++ /dev/null @@ -1,834 +0,0 @@ -#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency -#define FMT_32 0 -#define FMT_24 1 -#define FMT_16 2 -#define FMT_PAL 4 /* flag bit */ - -// And I say this as an ATI user. -#define ATI_SUCKS 1 - -#if SHADER_MODEL >= 0x400 - -#ifndef VS_BPPZ -#define VS_BPPZ 0 -#define VS_TME 1 -#define VS_FST 1 -#endif - -#ifndef GS_IIP -#define GS_IIP 0 -#define GS_PRIM 3 -#endif - -#ifndef PS_FST -#define PS_FST 0 -#define PS_WMS 0 -#define PS_WMT 0 -#define PS_FMT FMT_32 -#define PS_AEM 0 -#define PS_TFX 0 -#define PS_TCC 1 -#define PS_ATST 1 -#define PS_FOG 0 -#define PS_CLR1 0 -#define PS_FBA 0 -#define PS_AOUT 0 -#define PS_LTF 1 -#define PS_COLCLIP 0 -#define PS_DATE 0 -#define PS_SPRITEHACK 0 -#define PS_TCOFFSETHACK 0 -#define PS_POINT_SAMPLER 0 -#define PS_SHUFFLE 0 -#define PS_READ_BA 0 -#endif - -struct VS_INPUT -{ - float2 st : TEXCOORD0; - float4 c : COLOR0; - float q : TEXCOORD1; - uint2 p : POSITION0; - uint z : POSITION1; - uint2 uv : TEXCOORD2; - float4 f : COLOR1; -}; - -struct VS_OUTPUT -{ - float4 p : SV_Position; - float4 t : TEXCOORD0; -#if VS_RTCOPY - float4 tp : TEXCOORD1; -#endif - float4 c : COLOR0; -}; - -struct PS_INPUT -{ - float4 p : SV_Position; - float4 t : TEXCOORD0; -#if PS_DATE > 0 - float4 tp : TEXCOORD1; -#endif - float4 c : COLOR0; -}; - -struct PS_OUTPUT -{ - float4 c0 : SV_Target0; - float4 c1 : SV_Target1; -}; - -Texture2D Texture : register(t0); -Texture2D Palette : register(t1); -Texture2D RTCopy : register(t2); -SamplerState TextureSampler : register(s0); -SamplerState PaletteSampler : register(s1); -SamplerState RTCopySampler : register(s2); - -cbuffer cb0 -{ - float4 VertexScale; - float4 VertexOffset; - float2 TextureScale; -}; - -cbuffer cb1 -{ - float3 FogColor; - float AREF; - float4 HalfTexel; - float4 WH; - float4 MinMax; - float2 MinF; - float2 TA; - uint4 MskFix; - float4 TC_OffsetHack; -}; - -float4 sample_c(float2 uv) -{ - if (ATI_SUCKS && PS_POINT_SAMPLER) - { - // Weird issue with ATI cards (happens on at least HD 4xxx and 5xxx), - // it looks like they add 127/128 of a texel to sampling coordinates - // occasionally causing point sampling to erroneously round up. - // I'm manually adjusting coordinates to the centre of texels here, - // though the centre is just paranoia, the top left corner works fine. - uv = (trunc(uv * WH.zw) + float2(0.5, 0.5)) / WH.zw; - } - return Texture.Sample(TextureSampler, uv); -} - -float4 sample_p(float u) -{ - return Palette.Sample(PaletteSampler, u); -} - -float4 sample_rt(float2 uv) -{ - return RTCopy.Sample(RTCopySampler, uv); -} - -#elif SHADER_MODEL <= 0x300 - -#ifndef VS_BPPZ -#define VS_BPPZ 0 -#define VS_TME 1 -#define VS_FST 1 -#define VS_LOGZ 1 -#endif - -#ifndef PS_FST -#define PS_FST 0 -#define PS_WMS 0 -#define PS_WMT 0 -#define PS_FMT FMT_32 -#define PS_AEM 0 -#define PS_TFX 0 -#define PS_TCC 0 -#define PS_ATST 4 -#define PS_FOG 0 -#define PS_CLR1 0 -#define PS_RT 0 -#define PS_LTF 0 -#define PS_COLCLIP 0 -#define PS_DATE 0 -#endif - -struct VS_INPUT -{ - float4 p : POSITION0; - float2 t : TEXCOORD0; - float4 c : COLOR0; - float4 f : COLOR1; -}; - -struct VS_OUTPUT -{ - float4 p : POSITION; - float4 t : TEXCOORD0; -#if VS_RTCOPY - float4 tp : TEXCOORD1; -#endif - float4 c : COLOR0; -}; - -struct PS_INPUT -{ - float4 t : TEXCOORD0; -#if PS_DATE > 0 - float4 tp : TEXCOORD1; -#endif - float4 c : COLOR0; -}; - -sampler Texture : register(s0); -sampler Palette : register(s1); -sampler RTCopy : register(s2); -sampler1D UMSKFIX : register(s3); -sampler1D VMSKFIX : register(s4); - -float4 vs_params[3]; - -#define VertexScale vs_params[0] -#define VertexOffset vs_params[1] -#define TextureScale vs_params[2].xy - -float4 ps_params[7]; - -#define FogColor ps_params[0].bgr -#define AREF ps_params[0].a -#define HalfTexel ps_params[1] -#define WH ps_params[2] -#define MinMax ps_params[3] -#define MinF ps_params[4].xy -#define TA ps_params[4].zw - -#define TC_OffsetHack ps_params[6] - -float4 sample_c(float2 uv) -{ - return tex2D(Texture, uv); -} - -float4 sample_p(float u) -{ - return tex2D(Palette, u); -} - -float4 sample_rt(float2 uv) -{ - return tex2D(RTCopy, uv); -} - -#endif - -float4 wrapuv(float4 uv) -{ - if(PS_WMS == PS_WMT) - { -/* - if(PS_WMS == 0) - { - uv = frac(uv); - } - else if(PS_WMS == 1) - { - uv = saturate(uv); - } - else -*/ - if(PS_WMS == 2) - { - uv = clamp(uv, MinMax.xyxy, MinMax.zwzw); - } - else if(PS_WMS == 3) - { - #if SHADER_MODEL >= 0x400 - uv = (float4)(((int4)(uv * WH.xyxy) & MskFix.xyxy) | MskFix.zwzw) / WH.xyxy; - #elif SHADER_MODEL <= 0x300 - uv.x = tex1D(UMSKFIX, uv.x); - uv.y = tex1D(VMSKFIX, uv.y); - uv.z = tex1D(UMSKFIX, uv.z); - uv.w = tex1D(VMSKFIX, uv.w); - #endif - } - } - else - { -/* - if(PS_WMS == 0) - { - uv.xz = frac(uv.xz); - } - else if(PS_WMS == 1) - { - uv.xz = saturate(uv.xz); - } - else -*/ - if(PS_WMS == 2) - { - uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - } - else if(PS_WMS == 3) - { - #if SHADER_MODEL >= 0x400 - uv.xz = (float2)(((int2)(uv.xz * WH.xx) & MskFix.xx) | MskFix.zz) / WH.xx; - #elif SHADER_MODEL <= 0x300 - uv.x = tex1D(UMSKFIX, uv.x); - uv.z = tex1D(UMSKFIX, uv.z); - #endif - } -/* - if(PS_WMT == 0) - { - uv.yw = frac(uv.yw); - } - else if(PS_WMT == 1) - { - uv.yw = saturate(uv.yw); - } - else -*/ - if(PS_WMT == 2) - { - uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - } - else if(PS_WMT == 3) - { - #if SHADER_MODEL >= 0x400 - uv.yw = (float2)(((int2)(uv.yw * WH.yy) & MskFix.yy) | MskFix.ww) / WH.yy; - #elif SHADER_MODEL <= 0x300 - uv.y = tex1D(VMSKFIX, uv.y); - uv.w = tex1D(VMSKFIX, uv.w); - #endif - } - } - - return uv; -} - -float2 clampuv(float2 uv) -{ - if(PS_WMS == 2 && PS_WMT == 2) - { - uv = clamp(uv, MinF, MinMax.zw); - } - else if(PS_WMS == 2) - { - uv.x = clamp(uv.x, MinF.x, MinMax.z); - } - else if(PS_WMT == 2) - { - uv.y = clamp(uv.y, MinF.y, MinMax.w); - } - - return uv; -} - -float4x4 sample_4c(float4 uv) -{ - float4x4 c; - - c[0] = sample_c(uv.xy); - c[1] = sample_c(uv.zy); - c[2] = sample_c(uv.xw); - c[3] = sample_c(uv.zw); - - return c; -} - -float4 sample_4a(float4 uv) -{ - float4 c; - - c.x = sample_c(uv.xy).a; - c.y = sample_c(uv.zy).a; - c.z = sample_c(uv.xw).a; - c.w = sample_c(uv.zw).a; - - #if SHADER_MODEL <= 0x300 - if(PS_RT) c *= 128.0f / 255; - #endif - - return c * 255./256 + 0.5/256; -} - -float4x4 sample_4p(float4 u) -{ - float4x4 c; - - c[0] = sample_p(u.x); - c[1] = sample_p(u.y); - c[2] = sample_p(u.z); - c[3] = sample_p(u.w); - - return c; -} - -float4 sample(float2 st, float q) -{ - if(!PS_FST) st /= q; - - #if PS_TCOFFSETHACK - st += TC_OffsetHack.xy; - #endif - - float4 t; - float4x4 c; - float2 dd; - -/* - if(!PS_LTF && PS_FMT <= FMT_16 && PS_WMS < 2 && PS_WMT < 2) - { - c[0] = sample_c(st); - } -*/ - if (!PS_LTF && PS_FMT <= FMT_16 && PS_WMS < 3 && PS_WMT < 3) - { - c[0] = sample_c(clampuv(st)); - } - else - { - float4 uv; - - if(PS_LTF) - { - uv = st.xyxy + HalfTexel; - dd = frac(uv.xy * WH.zw); - } - else - { - uv = st.xyxy; - } - - uv = wrapuv(uv); - - if(PS_FMT & FMT_PAL) - { - c = sample_4p(sample_4a(uv)); - } - else - { - c = sample_4c(uv); - } - } - - [unroll] - for (uint i = 0; i < 4; i++) - { - if((PS_FMT & ~FMT_PAL) == FMT_32) - { - #if SHADER_MODEL <= 0x300 - if(PS_RT) c[i].a *= 128.0f / 255; - #endif - } - else if((PS_FMT & ~FMT_PAL) == FMT_24) - { - c[i].a = !PS_AEM || any(c[i].rgb) ? TA.x : 0; - } - else if((PS_FMT & ~FMT_PAL) == FMT_16) - { - c[i].a = c[i].a >= 0.5 ? TA.y : !PS_AEM || any(c[i].rgb) ? TA.x : 0; - } - } - - if(PS_LTF) - { - t = lerp(lerp(c[0], c[1], dd.x), lerp(c[2], c[3], dd.x), dd.y); - } - else - { - t = c[0]; - } - - return t; -} - -float4 tfx(float4 t, float4 c) -{ - if(PS_TFX == 0) - { - if(PS_TCC) - { - c = c * t * 255.0f / 128; - } - else - { - c.rgb = c.rgb * t.rgb * 255.0f / 128; - } - } - else if(PS_TFX == 1) - { - if(PS_TCC) - { - c = t; - } - else - { - c.rgb = t.rgb; - } - } - else if(PS_TFX == 2) - { - c.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a; - - if(PS_TCC) - { - c.a += t.a; - } - } - else if(PS_TFX == 3) - { - c.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a; - - if(PS_TCC) - { - c.a = t.a; - } - } - - return saturate(c); -} - -void datst(PS_INPUT input) -{ -#if PS_DATE > 0 - float alpha = sample_rt(input.tp.xy).a; -#if SHADER_MODEL >= 0x400 - float alpha0x80 = 128. / 255; -#else - float alpha0x80 = 1; -#endif - - if (PS_DATE == 1 && alpha >= alpha0x80) - discard; - else if (PS_DATE == 2 && alpha < alpha0x80) - discard; -#endif -} - -void atst(float4 c) -{ - float a = trunc(c.a * 255 + 0.01); - - if(PS_ATST == 0) // never - { - discard; - } - else if(PS_ATST == 1) // always - { - // nothing to do - } - else if(PS_ATST == 2) // l - { - #if PS_SPRITEHACK == 0 - clip(AREF - a - 0.5f); - #endif - } - else if(PS_ATST == 3) // le - { - clip(AREF - a + 0.5f); - } - else if(PS_ATST == 4) // e - { - clip(0.5f - abs(a - AREF)); - } - else if(PS_ATST == 5) // ge - { - clip(a - AREF + 0.5f); - } - else if(PS_ATST == 6) // g - { - clip(a - AREF - 0.5f); - } - else if(PS_ATST == 7) // ne - { - clip(abs(a - AREF) - 0.5f); - } -} - -float4 fog(float4 c, float f) -{ - if(PS_FOG) - { - c.rgb = lerp(FogColor, c.rgb, f); - } - - return c; -} - -float4 ps_color(PS_INPUT input) -{ - datst(input); - - float4 t = sample(input.t.xy, input.t.w); - - float4 c = tfx(t, input.c); - - atst(c); - - c = fog(c, input.t.z); - - if (PS_COLCLIP == 2) - { - c.rgb = 256./255. - c.rgb; - } - if (PS_COLCLIP > 0) - { - c.rgb *= c.rgb < 128./255; - } - - if(PS_CLR1) // needed for Cd * (As/Ad/F + 1) blending modes - { - c.rgb = 1; - } - - return c; -} - -#if SHADER_MODEL >= 0x400 - -VS_OUTPUT vs_main(VS_INPUT input) -{ - if(VS_BPPZ == 1) // 24 - { - input.z = input.z & 0xffffff; - } - else if(VS_BPPZ == 2) // 16 - { - input.z = input.z & 0xffff; - } - - VS_OUTPUT output; - - // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go) - // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty - // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel - // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133 - - float4 p = float4(input.p, input.z, 0) - float4(0.05f, 0.05f, 0, 0); - - output.p = p * VertexScale - VertexOffset; -#if VS_RTCOPY - output.tp = (p * VertexScale - VertexOffset) * float4(0.5, -0.5, 0, 0) + 0.5; -#endif - - if(VS_TME) - { - if(VS_FST) - { - output.t.xy = input.uv * TextureScale; - output.t.w = 1.0f; - } - else - { - output.t.xy = input.st; - output.t.w = input.q; - } - } - else - { - output.t.xy = 0; - output.t.w = 1.0f; - } - - output.c = input.c; - output.t.z = input.f.r; - - return output; -} - -#if GS_PRIM == 0 - -[maxvertexcount(1)] -void gs_main(point VS_OUTPUT input[1], inout PointStream stream) -{ - stream.Append(input[0]); -} - -#elif GS_PRIM == 1 - -[maxvertexcount(2)] -void gs_main(line VS_OUTPUT input[2], inout LineStream stream) -{ - #if GS_IIP == 0 - input[0].c = input[1].c; - #endif - - stream.Append(input[0]); - stream.Append(input[1]); -} - -#elif GS_PRIM == 2 - -[maxvertexcount(3)] -void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream stream) -{ - #if GS_IIP == 0 - input[0].c = input[2].c; - input[1].c = input[2].c; - #endif - - stream.Append(input[0]); - stream.Append(input[1]); - stream.Append(input[2]); -} - -#elif GS_PRIM == 3 - -[maxvertexcount(4)] -void gs_main(line VS_OUTPUT input[2], inout TriangleStream stream) -{ - input[0].p.z = input[1].p.z; - input[0].t.zw = input[1].t.zw; - - #if GS_IIP == 0 - input[0].c = input[1].c; - #endif - - VS_OUTPUT lb = input[1]; - - lb.p.x = input[0].p.x; - lb.t.x = input[0].t.x; - - VS_OUTPUT rt = input[1]; - - rt.p.y = input[0].p.y; - rt.t.y = input[0].t.y; - - stream.Append(input[0]); - stream.Append(lb); - stream.Append(rt); - stream.Append(input[1]); -} - -#endif - -PS_OUTPUT ps_main(PS_INPUT input) -{ - float4 c = ps_color(input); - - PS_OUTPUT output; - - if (PS_SHUFFLE){ - uint4 denorm_c = uint4(c * 255.0f + 0.5f); - uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f); - - // Mask will take care of the correct destination - if (PS_READ_BA){ - c.rb = c.bb; - } - else { - c.rb = c.rr; - } - c.g = c.a; - if (PS_READ_BA){ - if (denorm_c.a & 0x80) - c.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f; - else - c.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f; - - //c.g = c.a; - } - else { - if (denorm_c.g & 0x80) - c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f; - else - c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f; - - //c.g = c.a; - } - //Probably not right :/ - //c.g = c.b; - } - - output.c1 = c.a * 2; // used for alpha blending - - if(PS_AOUT) // 16 bit output - { - float a = 128.0f / 255; // alpha output will be 0x80 - - c.a = PS_FBA ? a : step(0.5, c.a) * a; - } - else if(PS_FBA) - { - if(c.a < 0.5) c.a += 0.5; - } - - output.c0 = c; - - return output; -} - -#elif SHADER_MODEL <= 0x300 - -VS_OUTPUT vs_main(VS_INPUT input) -{ - if(VS_BPPZ == 1) // 24 - { - input.p.z = fmod(input.p.z, 0x1000000); - } - else if(VS_BPPZ == 2) // 16 - { - input.p.z = fmod(input.p.z, 0x10000); - } - - VS_OUTPUT output; - - // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go) - // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty - // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel - // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133 - - float4 p = input.p - float4(0.05f, 0.05f, 0, 0); - - output.p = p * VertexScale - VertexOffset; -#if VS_RTCOPY - output.tp = (p * VertexScale - VertexOffset) * float4(0.5, -0.5, 0, 0) + 0.5; -#endif - - if(VS_LOGZ) - { - output.p.z = log2(1.0f + input.p.z) / 32; - } - - if(VS_TME) - { - if(VS_FST) - { - output.t.xy = input.t * TextureScale; - output.t.w = 1.0f; - } - else - { - output.t.xy = input.t; - output.t.w = input.p.w; - } - } - else - { - output.t.xy = 0; - output.t.w = 1.0f; - } - - output.c = input.c; - output.t.z = input.f.b; - - return output; -} - -float4 ps_main(PS_INPUT input) : COLOR -{ - float4 c = ps_color(input); - - c.a *= 2; - - return c; -} - -#endif -#endif diff --git a/plugins/GSdx_legacy/resource.h b/plugins/GSdx_legacy/resource.h deleted file mode 100644 index 19e54c8d19..0000000000 --- a/plugins/GSdx_legacy/resource.h +++ /dev/null @@ -1,115 +0,0 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by GSdx.rc -// - -#define IDC_PALTEX 2001 -#define IDC_LOGZ 2002 -#define IDC_CODECS 2003 -#define IDC_RESOLUTION 2004 -#define IDC_RESX_EDIT 2005 -#define IDC_RESY_EDIT 2006 -#define IDC_AA1 2007 -#define IDC_SWTHREADS_TEXT 2008 -#define IDC_SWTHREADS 2009 -#define IDC_SWTHREADS_EDIT 2010 -#define IDC_FILTER_TEXT 2011 -#define IDC_FILTER 2012 -#define IDC_DITHERING 2013 -#define IDC_RESX 2014 -#define IDC_RESY 2015 -#define IDD_CONFIG 2016 -#define IDB_LOGO9 2017 -#define IDB_LOGO10 2018 -#define IDB_LOGOGL 2019 -#define IDC_FBA 2020 -#define IDC_LOGO9 2021 -#define IDC_LOGO11 2022 -#define IDC_LOGOGL 2023 -#define IDD_CAPTURE 2024 -#define IDD_GPUCONFIG 2025 -#define IDC_RENDERER 2026 -#define IDC_INTERLACE 2027 -#define IDC_ASPECTRATIO 2028 -#define IDC_ALPHAHACK 2029 -#define IDC_SCALE 2030 -#define IDC_UPSCALE_MULTIPLIER 2031 -#define IDC_BROWSE 2032 -#define IDC_OFFSETHACK 2033 -#define IDC_FILENAME 2034 -#define IDC_SKIPDRAWHACK 2035 -#define IDC_WIDTH 2036 -#define IDC_HEIGHT 2037 -#define IDC_CONFIGURE 2038 -#define IDC_ACCURATE_BLEND_UNIT_TEXT 2039 -#define IDC_WINDOWED 2040 -#define IDC_SKIPDRAWHACKEDIT 2041 -#define IDC_SPRITEHACK 2042 -#define IDC_SATURATION_SLIDER 2043 -#define IDC_BRIGHTNESS_SLIDER 2044 -#define IDC_CONTRAST_SLIDER 2045 -#define IDC_SHADEBUTTON 2046 -#define IDC_SHADEBOOST 2047 -#define IDC_HACKS_ENABLED 2048 -#define IDC_SATURATION_TEXT 2049 -#define IDC_BRIGHTNESS_TEXT 2050 -#define IDC_CONTRAST_TEXT 2051 -#define IDC_MSAACB 2052 -#define IDC_MSAA_TEXT 2053 -#define IDC_HACKSBUTTON 2054 -#define IDC_WILDHACK 2055 -#define IDC_CHECK_DISABLE_ALL_HACKS 2056 -#define IDC_ALPHASTENCIL 2057 -#define IDC_ADAPTER 2058 -#define IDC_TCOFFSETX 2059 -#define IDC_TCOFFSETX2 2060 -#define IDC_TCOFFSETY 2061 -#define IDC_TCOFFSETY2 2062 -#define IDC_FXAA 2063 -#define IDC_SHADER_FX 2064 -#define IDC_AFCOMBO_TEXT 2065 -#define IDC_AFCOMBO 2066 -#define IDC_OPENCL_DEVICE 2067 -#define IDC_OPENCL_TEXT 2068 -#define IDC_ACCURATE_BLEND_UNIT 2069 -#define IDC_ACCURATE_DATE 2070 -#define IDC_ROUND_SPRITE 2071 -#define IDC_ALIGN_SPRITE 2072 -#define IDC_CRC_LEVEL 2073 -#define IDC_CRC_LEVEL_TEXT 2074 -#define IDC_TC_DEPTH 2075 -#define IDC_COLORSPACE 2076 -#define IDC_SHADER_FX_EDIT 2077 -#define IDC_SHADER_FX_CONF_EDIT 2078 -#define IDC_SHADER_FX_BUTTON 2079 -#define IDC_SHADER_FX_CONF_BUTTON 2080 -#define IDC_SHADER_FX_TEXT 2081 -#define IDC_SHADER_FX_CONF_TEXT 2082 -#define IDC_CUSTOM_TEXT 2083 -#define IDC_UPSCALE_MULTIPLIER_TEXT 2084 -#define IDC_MIPMAP 2085 -#define IDC_PRELOAD_GS 2086 -#define IDC_TVSHADER 2087 -#define IDC_SAFE_FBMASK 2088 -#define IDR_CONVERT_FX 10000 -#define IDR_TFX_FX 10001 -#define IDR_MERGE_FX 10002 -#define IDR_INTERLACE_FX 10003 -#define IDR_FXAA_FX 10004 -#define IDR_CS_FX 10005 -#define IDD_SHADER 10006 -#define IDR_SHADEBOOST_FX 10007 -#define IDR_TFX_CL 10008 -#define IDD_HACKS 10009 -#define IDC_STATIC -1 - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 10013 -#define _APS_NEXT_COMMAND_VALUE 32771 -#define _APS_NEXT_CONTROL_VALUE 2091 -#define _APS_NEXT_SYMED_VALUE 5000 -#endif -#endif diff --git a/plugins/GSdx_legacy/stdafx.cpp b/plugins/GSdx_legacy/stdafx.cpp deleted file mode 100644 index b36ceb77cb..0000000000 --- a/plugins/GSdx_legacy/stdafx.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -// stdafx.cpp : source file that includes just the standard includes -// GSdx.pch will be the pre-compiled header -// stdafx.obj will contain the pre-compiled type information - -#include "stdafx.h" - -// TODO: reference any additional headers you need in STDAFX.H -// and not in this file - -string format(const char* fmt, ...) -{ - va_list args; - va_start(args, fmt); - - int result = -1, length = 256; - - char* buffer = NULL; - - while(result == -1) - { - if(buffer) delete [] buffer; - - buffer = new char[length + 1]; - - memset(buffer, 0, length + 1); - - result = vsnprintf(buffer, length, fmt, args); - - length *= 2; - } - - va_end(args); - - string s(buffer); - - delete [] buffer; - - return s; -} - -#ifdef _WIN32 - -void* vmalloc(size_t size, bool code) -{ - return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, code ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); -} - -void vmfree(void* ptr, size_t size) -{ - VirtualFree(ptr, 0, MEM_RELEASE); -} - -#else - -#include -#include - -void* vmalloc(size_t size, bool code) -{ - size_t mask = getpagesize() - 1; - - size = (size + mask) & ~mask; - - int flags = PROT_READ | PROT_WRITE; - - if(code) - { - flags |= PROT_EXEC; - } - - return mmap(NULL, size, flags, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -} - -void vmfree(void* ptr, size_t size) -{ - size_t mask = getpagesize() - 1; - - size = (size + mask) & ~mask; - - munmap(ptr, size); -} - -#endif - -#if !defined(_MSC_VER) - -// declare linux equivalents (alignment must be power of 2 (1,2,4...2^15) - -#if !defined(__USE_ISOC11) || defined(ASAN_WORKAROUND) - -void* _aligned_malloc(size_t size, size_t alignment) -{ - void *ret = 0; - posix_memalign(&ret, alignment, size); - return ret; -} - -#endif - -#endif diff --git a/plugins/GSdx_legacy/stdafx.h b/plugins/GSdx_legacy/stdafx.h deleted file mode 100644 index 07f55cfcd7..0000000000 --- a/plugins/GSdx_legacy/stdafx.h +++ /dev/null @@ -1,456 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -// stdafx.h : include file for standard system include files, -// or project specific include files that are used frequently, but -// are changed infrequently - -#pragma once - -#include "config.h" - -#ifdef _WIN32 - -#include "targetver.h" - -#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define D3DCOLORWRITEENABLE_RGBA (D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA) - -#endif - - -#ifdef ENABLE_OPENCL - -#define CL_USE_DEPRECATED_OPENCL_1_1_APIS -#define __CL_ENABLE_EXCEPTIONS -#include - -#endif - -// put these into vc9/common7/ide/usertype.dat to have them highlighted - -typedef unsigned char uint8; -typedef signed char int8; -typedef unsigned short uint16; -typedef signed short int16; -typedef unsigned int uint32; -typedef signed int int32; -typedef unsigned long long uint64; -typedef signed long long int64; -#ifdef __x86_64__ -typedef uint64 uptr; -#else -typedef uint32 uptr; -#endif - - -// xbyak compatibilities -typedef int64 sint64; -#define MIE_INTEGER_TYPE_DEFINED - -// stdc - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; - -#include - -#include - -#if _MSC_VER >= 1800 || !defined(_WIN32) -#include -#include -#define hash_map unordered_map -#define hash_set unordered_set -#else -#include -#include -using namespace stdext; -#endif - -#ifdef _WIN32 - - // Note use GL/glcorearb.h on the future - #include - #include - #include - #include "GLLoader.h" - - // hashing algoritms at: http://www.cris.com/~Ttwang/tech/inthash.htm - // default hash_compare does ldiv and other crazy stuff to reduce speed - - template<> class hash_compare - { - public: - enum {bucket_size = 1}; - - size_t operator()(uint32 key) const - { - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - - return (size_t)key; - } - - bool operator()(uint32 a, uint32 b) const - { - return a < b; - } - }; - - template<> class hash_compare - { - public: - enum {bucket_size = 1}; - - size_t operator()(uint64 key) const - { - key += ~(key << 32); - key ^= (key >> 22); - key += ~(key << 13); - key ^= (key >> 8); - key += (key << 3); - key ^= (key >> 15); - key += ~(key << 27); - key ^= (key >> 31); - - return (size_t)key; - } - - bool operator()(uint64 a, uint64 b) const - { - return a < b; - } - }; - - #define vsnprintf _vsnprintf - #define snprintf _snprintf - - #define DIRECTORY_SEPARATOR '\\' - -#else - - // Note use GL/glcorearb.h on the future - #include - #include - #include "GLLoader.h" - - #include // mkdir - - #define DIRECTORY_SEPARATOR '/' - -#endif - -#ifdef _MSC_VER - - #define __aligned(t, n) __declspec(align(n)) t - - #define EXPORT_C_(type) extern "C" type __stdcall - #define EXPORT_C EXPORT_C_(void) - - #define ALIGN_STACK(n) __aligned(int, n) __dummy; - -#else - - #define __aligned(t, n) t __attribute__((aligned(n))) - #define __fastcall __attribute__((fastcall)) - - #define EXPORT_C_(type) extern "C" __attribute__((stdcall,externally_visible,visibility("default"))) type - #define EXPORT_C EXPORT_C_(void) - - #ifdef __GNUC__ - - #include "assert.h" - #define __forceinline __inline__ __attribute__((always_inline,unused)) - // #define __forceinline __inline__ __attribute__((__always_inline__,__gnu_inline__)) - #define __assume(c) do { if (!(c)) __builtin_unreachable(); } while(0) - - // GCC removes the variable as dead code and generates some warnings. - // Stack is automatically realigned due to SSE/AVX operations - #define ALIGN_STACK(n) (void)0; - - #else - - // TODO Check clang behavior - #define ALIGN_STACK(n) __aligned(int, n) __dummy; - - #endif - - -#endif - -extern string format(const char* fmt, ...); - -struct delete_object {template void operator()(T& p) {delete p;}}; -struct delete_first {template void operator()(T& p) {delete p.first;}}; -struct delete_second {template void operator()(T& p) {delete p.second;}}; -struct aligned_free_object {template void operator()(T& p) {_aligned_free(p);}}; -struct aligned_free_first {template void operator()(T& p) {_aligned_free(p.first);}}; -struct aligned_free_second {template void operator()(T& p) {_aligned_free(p.second);}}; - -#define countof(a) (sizeof(a) / sizeof(a[0])) - -#ifndef RESTRICT - - #ifdef __INTEL_COMPILER - - #define RESTRICT restrict - - #elif defined(_MSC_VER) - - #define RESTRICT __restrict - - #elif defined(__GNUC__) - - #define RESTRICT __restrict__ - - #else - - #define RESTRICT - - #endif - -#endif - -#if defined(_DEBUG) //&& defined(_MSC_VER) - - #include - #define ASSERT assert - -#else - - #define ASSERT(exp) ((void)0) - -#endif - -#ifdef __x86_64__ - - #define _M_AMD64 - -#endif - -// sse -#if defined(__GNUC__) && !defined(__x86_64__) -// Convert gcc see define into GSdx (windows) define -#if defined(__AVX2__) - #define _M_SSE 0x501 -#elif defined(__AVX__) - #define _M_SSE 0x500 -#elif defined(__SSE4_2__) - #define _M_SSE 0x402 -#elif defined(__SSE4_1__) - #define _M_SSE 0x401 -#elif defined(__SSSE3__) - #define _M_SSE 0x301 -#elif defined(__SSE2__) - #define _M_SSE 0x200 -#elif defined(__SSE__) - #define _M_SSE 0x100 -#endif - -#endif - -#if !defined(_M_SSE) && (!defined(_WIN32) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2) - - #define _M_SSE 0x200 - -#endif - -#if _M_SSE >= 0x200 - - #include - #include - - #ifndef _MM_DENORMALS_ARE_ZERO - #define _MM_DENORMALS_ARE_ZERO 0x0040 - #endif - - #define MXCSR (_MM_DENORMALS_ARE_ZERO | _MM_MASK_MASK | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON) - - #define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \ - { \ - __m128 tmp0 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0x44); \ - __m128 tmp2 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0xEE); \ - __m128 tmp1 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0x44); \ - __m128 tmp3 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0xEE); \ - (row0) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0x88)); \ - (row1) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0xDD)); \ - (row2) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0x88)); \ - (row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \ - } - -#else - -#error TODO: GSVector4 and GSRasterizer needs SSE2 - -#endif - -#if _M_SSE >= 0x301 - - #include - -#endif - -#if _M_SSE >= 0x401 - - #include - -#endif - -#if _M_SSE >= 0x500 - - #include - -#endif - -#undef min -#undef max -#undef abs - -#if !defined(_MSC_VER) - #if defined(__USE_ISOC11) && !defined(ASAN_WORKAROUND) // not supported yet on gcc 4.9 - - #define _aligned_malloc(size, a) aligned_alloc(a, size) - - #else - - extern void* _aligned_malloc(size_t size, size_t alignment); - - #endif - - static inline void _aligned_free(void* p) { - free(p); - } - - // http://svn.reactos.org/svn/reactos/trunk/reactos/include/crt/mingw32/intrin_x86.h?view=markup - - __forceinline unsigned char _BitScanForward(unsigned long* const Index, const unsigned long Mask) - { - __asm__("bsfl %k[Mask], %k[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask)); - - return Mask ? 1 : 0; - } - - #ifdef __GNUC__ - - // gcc 4.8 define __rdtsc but unfortunately the compiler crash... - // The redefine allow to skip the gcc __rdtsc version -- Gregory - #define __rdtsc _lnx_rdtsc - //__forceinline unsigned long long __rdtsc() - __forceinline unsigned long long _lnx_rdtsc() - { - #if defined(__amd64__) || defined(__x86_64__) - unsigned long long low, high; - __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high)); - return low | (high << 32); - #else - unsigned long long retval; - __asm__ __volatile__("rdtsc" : "=A"(retval)); - return retval; - #endif - } - - #endif - -#endif - -extern void* vmalloc(size_t size, bool code); -extern void vmfree(void* ptr, size_t size); - -#ifdef _WIN32 - - #ifdef ENABLE_VTUNE - - #include - - #pragma comment(lib, "jitprofiling.lib") - - #endif - -#endif - -#define GL_INSERT(type, code, sev, ...) \ - do if (glDebugMessageInsert) glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION, type, code, sev, -1, format(__VA_ARGS__).c_str()); while(0); - -// Except apple any sane driver support this extension -#if defined(_DEBUG) -#define GL_CACHE(...) GL_INSERT(GL_DEBUG_TYPE_OTHER, 0xFEAD, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) -#else -#define GL_CACHE(...) (0); -#endif - -#if defined(ENABLE_OGL_DEBUG) -#define GL_PUSH(...) do if (glPushDebugGroup) glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0xBAD, -1, format(__VA_ARGS__).c_str()); while(0); -#define GL_POP() do if (glPopDebugGroup) glPopDebugGroup(); while(0); -#define GL_INS(...) GL_INSERT(GL_DEBUG_TYPE_ERROR, 0xDEAD, GL_DEBUG_SEVERITY_MEDIUM, __VA_ARGS__) -#define GL_PERF(...) GL_INSERT(GL_DEBUG_TYPE_PERFORMANCE, 0xFEE1, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) -#else -#define GL_PUSH(...) (0); -#define GL_POP() (0); -#define GL_INS(...) (0); -#define GL_PERF(...) (0); -#endif - -// Helper path to dump texture -#ifdef _WIN32 -const std::string root_sw("c:\\temp1\\_"); -const std::string root_hw("c:\\temp2\\_"); -#else -const std::string root_sw("/tmp/GS_SW_dump/"); -const std::string root_hw("/tmp/GS_HW_dump/"); -#endif diff --git a/plugins/GSdx_legacy/targetver.h b/plugins/GSdx_legacy/targetver.h deleted file mode 100644 index ba3f81934a..0000000000 --- a/plugins/GSdx_legacy/targetver.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#pragma once - -#define _WIN32_WINNT 0x0600 diff --git a/plugins/GSdx_legacy/vsprops/ProjectRootDir.props b/plugins/GSdx_legacy/vsprops/ProjectRootDir.props deleted file mode 100644 index b78b467682..0000000000 --- a/plugins/GSdx_legacy/vsprops/ProjectRootDir.props +++ /dev/null @@ -1,26 +0,0 @@ - - - - $(ProjectDir). - $(ProjectRootDir)\..\.. - $(SvnRootDir)\common - plugins - - - <_ProjectFileVersion>10.0.30128.1 - - - - $(ProjectRootDir) - - - $(SvnRootDir) - - - $(SvnCommonDir) - - - $(PcsxSubsection) - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/avx2.props b/plugins/GSdx_legacy/vsprops/avx2.props deleted file mode 100644 index 928fc5b3ed..0000000000 --- a/plugins/GSdx_legacy/vsprops/avx2.props +++ /dev/null @@ -1,20 +0,0 @@ - - - - AVX2 - - - <_ProjectFileVersion>10.0.30128.1 - - - - _M_SSE=0x501;%(PreprocessorDefinitions) - AdvancedVectorExtensions - - - - - $(SSEtype) - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/avx_vs10.props b/plugins/GSdx_legacy/vsprops/avx_vs10.props deleted file mode 100644 index 876419d57e..0000000000 --- a/plugins/GSdx_legacy/vsprops/avx_vs10.props +++ /dev/null @@ -1,20 +0,0 @@ - - - - AVX - - - <_ProjectFileVersion>10.0.30128.1 - - - - _M_SSE=0x500;%(PreprocessorDefinitions) - StreamingSIMDExtensions2 - - - - - $(SSEtype) - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/avx_vs2012.props b/plugins/GSdx_legacy/vsprops/avx_vs2012.props deleted file mode 100644 index 42d81e1764..0000000000 --- a/plugins/GSdx_legacy/vsprops/avx_vs2012.props +++ /dev/null @@ -1,20 +0,0 @@ - - - - AVX - - - <_ProjectFileVersion>10.0.30128.1 - - - - _M_SSE=0x500;%(PreprocessorDefinitions) - AdvancedVectorExtensions - - - - - $(SSEtype) - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/avx_vs2013.props b/plugins/GSdx_legacy/vsprops/avx_vs2013.props deleted file mode 100644 index 42d81e1764..0000000000 --- a/plugins/GSdx_legacy/vsprops/avx_vs2013.props +++ /dev/null @@ -1,20 +0,0 @@ - - - - AVX - - - <_ProjectFileVersion>10.0.30128.1 - - - - _M_SSE=0x500;%(PreprocessorDefinitions) - AdvancedVectorExtensions - - - - - $(SSEtype) - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/common.props b/plugins/GSdx_legacy/vsprops/common.props deleted file mode 100644 index 441217cc07..0000000000 --- a/plugins/GSdx_legacy/vsprops/common.props +++ /dev/null @@ -1,31 +0,0 @@ - - - - <_ProjectFileVersion>10.0.30128.1 - $(SolutionDir)bin\$(PcsxSubsection)\ - $(PlatformName)\$(Configuration)\ - - - - true - Fast - false - Level4 - ProgramDatabase - 4456;4458;4996;4995;4324;4100;4101;4201;4556;4127;4512;%(DisableSpecificWarnings) - $(VTUNE_AMPLIFIER_XE_2015_DIR)include;$(SolutionDir)3rdparty\baseclasses;$(SolutionDir)3rdparty;$(SolutionDir)3rdparty\libpng;$(SolutionDir)3rdparty\opencl;$(SolutionDir)3rdparty\zlib;%(AdditionalIncludeDirectories) - true - - - d3d11.lib;d3d9.lib;dxgi.lib;dxguid.lib;winmm.lib;strmiids.lib;opengl32.lib;comsuppw.lib;comctl32.lib;%(AdditionalDependencies) - d3d9.dll;d3d11.dll;dxgi.dll;opengl32.dll;%(DelayLoadDLLs) - true - Windows - false - $(VTUNE_AMPLIFIER_XE_2015_DIR)lib32;%(AdditionalLibraryDirectories) - - - "$(SolutionDir)common\vsprops\preBuild.cmd" - - - diff --git a/plugins/GSdx_legacy/vsprops/debug.props b/plugins/GSdx_legacy/vsprops/debug.props deleted file mode 100644 index 197ceaf5ce..0000000000 --- a/plugins/GSdx_legacy/vsprops/debug.props +++ /dev/null @@ -1,16 +0,0 @@ - - - - <_ProjectFileVersion>10.0.30128.1 - $(ProjectName)$(PlatformArchitecture)-$(SSEtype)-dbg - true - - - - Disabled - _DEBUG;%(PreprocessorDefinitions) - StackFrameRuntimeCheck - MultiThreadedDebugDLL - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/release.props b/plugins/GSdx_legacy/vsprops/release.props deleted file mode 100644 index 595be69332..0000000000 --- a/plugins/GSdx_legacy/vsprops/release.props +++ /dev/null @@ -1,26 +0,0 @@ - - - - <_ProjectFileVersion>10.0.30128.1 - false - $(ProjectName)$(PlatformArchitecture)-$(SSEtype) - - - - MaxSpeed - AnySuitable - true - Speed - true - true - NDEBUG;_SECURE_SCL=0;%(PreprocessorDefinitions) - true - MultiThreadedDLL - false - - - true - true - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/sse2.props b/plugins/GSdx_legacy/vsprops/sse2.props deleted file mode 100644 index 97cc8ed3a8..0000000000 --- a/plugins/GSdx_legacy/vsprops/sse2.props +++ /dev/null @@ -1,20 +0,0 @@ - - - - SSE2 - - - <_ProjectFileVersion>10.0.30128.1 - - - - _M_SSE=0x200;%(PreprocessorDefinitions) - StreamingSIMDExtensions2 - - - - - $(SSEtype) - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/sse4.props b/plugins/GSdx_legacy/vsprops/sse4.props deleted file mode 100644 index 493174b82f..0000000000 --- a/plugins/GSdx_legacy/vsprops/sse4.props +++ /dev/null @@ -1,20 +0,0 @@ - - - - SSE4 - - - <_ProjectFileVersion>10.0.30128.1 - - - - _M_SSE=0x401;%(PreprocessorDefinitions) - StreamingSIMDExtensions2 - - - - - $(SSEtype) - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/ssse3.props b/plugins/GSdx_legacy/vsprops/ssse3.props deleted file mode 100644 index cb14dd851b..0000000000 --- a/plugins/GSdx_legacy/vsprops/ssse3.props +++ /dev/null @@ -1,21 +0,0 @@ - - - - SSSE3 - - - <_ProjectFileVersion>10.0.30128.1 - <_PropertySheetDisplayName>sse3 - - - - _M_SSE=0x301;%(PreprocessorDefinitions) - StreamingSIMDExtensions2 - - - - - $(SSEtype) - - - \ No newline at end of file diff --git a/plugins/GSdx_legacy/vsprops/x64.props b/plugins/GSdx_legacy/vsprops/x64.props deleted file mode 100644 index 55af6278c1..0000000000 --- a/plugins/GSdx_legacy/vsprops/x64.props +++ /dev/null @@ -1,8 +0,0 @@ - - - - - $(ProjectDir)vtune\x64;%(AdditionalLibraryDirectories) - - - diff --git a/plugins/GSdx_legacy/vsprops/x86.props b/plugins/GSdx_legacy/vsprops/x86.props deleted file mode 100644 index c6dd1d02de..0000000000 --- a/plugins/GSdx_legacy/vsprops/x86.props +++ /dev/null @@ -1,8 +0,0 @@ - - - - - $(ProjectDir)vtune\x86;%(AdditionalLibraryDirectories) - - - diff --git a/plugins/GSdx_legacy/xbyak/xbyak.h b/plugins/GSdx_legacy/xbyak/xbyak.h deleted file mode 100644 index 4f0f85e8bf..0000000000 --- a/plugins/GSdx_legacy/xbyak/xbyak.h +++ /dev/null @@ -1,2156 +0,0 @@ -/* Copyright (c) 2007 MITSUNARI Shigeo -* All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are met: -* -* Redistributions of source code must retain the above copyright notice, this -* list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, -* this list of conditions and the following disclaimer in the documentation -* and/or other materials provided with the distribution. -* Neither the name of the copyright owner nor the names of its contributors may -* be used to endorse or promote products derived from this software without -* specific prior written permission. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -* THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once -#ifndef XBYAK_XBYAK_H_ -#define XBYAK_XBYAK_H_ -/*! - @file xbyak.h - @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++ - @author herumi - @url https://github.com/herumi/xbyak, http://homepage1.nifty.com/herumi/soft/xbyak_e.html - @note modified new BSD license - http://opensource.org/licenses/BSD-3-Clause -*/ -#ifndef XBYAK_NO_OP_NAMES - #if not +0 // trick to detect whether 'not' is operator or not - #error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()." - #endif -#endif - -#include // for debug print -#include -#include -#include -#include -#ifndef NDEBUG -#include -#endif - -//#define XBYAK_USE_MMAP_ALLOCATOR -#if !defined(__GNUC__) || defined(__MINGW32__) - #undef XBYAK_USE_MMAP_ALLOCATOR -#endif - -// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft. -#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\ - ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__))) - #include - #define XBYAK_STD_UNORDERED_MAP std::unordered_map - #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap - -// Clang/llvm-gcc and ICC-EDG in 'GCC-mode' always claim to be GCC 4.2, using -// libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version). -// These headers have been expanded/fixed in various forks. -// In F.S.F. 'real' GCC, issues with the tr headers were resolved in GCC 4.5. -#elif defined(__GNUC__) && (__GNUC__ >= 4) && ((__GNUC_MINOR__ >= 5) || \ - ((__GLIBCXX__ >= 20070719) && (__GNUC_MINOR__ >= 2) && \ - (defined(__INTEL_COMPILER) || defined(__llvm__)))) - #include - #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map - #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap - -#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600) - #include - #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map - #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap - -#else - #include - #define XBYAK_STD_UNORDERED_MAP std::map - #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap -#endif -#ifdef _WIN32 - #include - #include -#elif defined(__GNUC__) - #include - #include - #include -#endif -#if !defined(_MSC_VER) || (_MSC_VER >= 1600) - #include -#endif - -#if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__)) - #define XBYAK64_WIN -#elif defined(__x86_64__) - #define XBYAK64_GCC -#endif -#if !defined(XBYAK64) && !defined(XBYAK32) - #if defined(XBYAK64_GCC) || defined(XBYAK64_WIN) - #define XBYAK64 - #else - #define XBYAK32 - #endif -#endif - -#if (__cplusplus >= 201103) || (_MSC_VER >= 1800) - #define XBYAK_VARIADIC_TEMPLATE -#endif - -#ifdef _MSC_VER - #pragma warning(push) - #pragma warning(disable : 4514) /* remove inline function */ - #pragma warning(disable : 4786) /* identifier is too long */ - #pragma warning(disable : 4503) /* name is too long */ - #pragma warning(disable : 4127) /* constant expresison */ -#endif - -namespace Xbyak { - -#include "xbyak_bin2hex.h" - -enum { - DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x4840 /* 0xABCD = A.BC(D) */ -}; - -#ifndef MIE_INTEGER_TYPE_DEFINED -#define MIE_INTEGER_TYPE_DEFINED -#ifdef _MSC_VER - typedef unsigned __int64 uint64; - typedef __int64 sint64; -#else - typedef uint64_t uint64; - typedef int64_t sint64; -#endif -typedef unsigned int uint32; -typedef unsigned short uint16; -typedef unsigned char uint8; -#endif - -#ifndef MIE_ALIGN - #ifdef _MSC_VER - #define MIE_ALIGN(x) __declspec(align(x)) - #else - #define MIE_ALIGN(x) __attribute__((aligned(x))) - #endif -#endif -#ifndef MIE_PACK // for shufps - #define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w)) -#endif - -enum { - ERR_NONE = 0, - ERR_BAD_ADDRESSING, - ERR_CODE_IS_TOO_BIG, - ERR_BAD_SCALE, - ERR_ESP_CANT_BE_INDEX, - ERR_BAD_COMBINATION, - ERR_BAD_SIZE_OF_REGISTER, - ERR_IMM_IS_TOO_BIG, - ERR_BAD_ALIGN, - ERR_LABEL_IS_REDEFINED, - ERR_LABEL_IS_TOO_FAR, - ERR_LABEL_IS_NOT_FOUND, - ERR_CODE_ISNOT_COPYABLE, - ERR_BAD_PARAMETER, - ERR_CANT_PROTECT, - ERR_CANT_USE_64BIT_DISP, - ERR_OFFSET_IS_TOO_BIG, - ERR_MEM_SIZE_IS_NOT_SPECIFIED, - ERR_BAD_MEM_SIZE, - ERR_BAD_ST_COMBINATION, - ERR_OVER_LOCAL_LABEL, // not used - ERR_UNDER_LOCAL_LABEL, - ERR_CANT_ALLOC, - ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW, - ERR_BAD_PROTECT_MODE, - ERR_BAD_PNUM, - ERR_BAD_TNUM, - ERR_BAD_VSIB_ADDRESSING, - ERR_CANT_CONVERT, - ERR_LABEL_ISNOT_SET_BY_L, - ERR_LABEL_IS_ALREADY_SET_BY_L, - ERR_BAD_LABEL_STR, - ERR_MUNMAP, - ERR_INTERNAL -}; - -class Error : public std::exception { - int err_; -public: - explicit Error(int err) : err_(err) - { - if (err_ < 0 || err_ > ERR_INTERNAL) { - fprintf(stderr, "bad err=%d in Xbyak::Error\n", err_); - exit(1); - } - } - operator int() const { return err_; } - const char *what() const throw() - { - static const char *errTbl[] = { - "none", - "bad addressing", - "code is too big", - "bad scale", - "esp can't be index", - "bad combination", - "bad size of register", - "imm is too big", - "bad align", - "label is redefined", - "label is too far", - "label is not found", - "code is not copyable", - "bad parameter", - "can't protect", - "can't use 64bit disp(use (void*))", - "offset is too big", - "MEM size is not specified", - "bad mem size", - "bad st combination", - "over local label", - "under local label", - "can't alloc", - "T_SHORT is not supported in AutoGrow", - "bad protect mode", - "bad pNum", - "bad tNum", - "bad vsib addressing", - "can't convert", - "label is not set by L()", - "label is already set by L()", - "bad label string", - "err munmap", - "internal error", - }; - assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl)); - return errTbl[err_]; - } -}; - -inline const char *ConvertErrorToString(Error err) -{ - return err.what(); -} - -inline void *AlignedMalloc(size_t size, size_t alignment) -{ -#ifdef __MINGW32__ - return __mingw_aligned_malloc(size, alignment); -#elif defined(_WIN32) - return _aligned_malloc(size, alignment); -#else - void *p; - int ret = posix_memalign(&p, alignment, size); - return (ret == 0) ? p : 0; -#endif -} - -inline void AlignedFree(void *p) -{ -#ifdef __MINGW32__ - __mingw_aligned_free(p); -#elif defined(_MSC_VER) - _aligned_free(p); -#else - free(p); -#endif -} - -template -inline const To CastTo(From p) throw() -{ - return (const To)(size_t)(p); -} -namespace inner { - -static const size_t ALIGN_PAGE_SIZE = 4096; - -inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; } -inline bool IsInInt32(uint64 x) { return ~uint64(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } - -inline uint32 VerifyInInt32(uint64 x) -{ -#ifdef XBYAK64 - if (!IsInInt32(x)) throw Error(ERR_OFFSET_IS_TOO_BIG); -#endif - return static_cast(x); -} - -enum LabelMode { - LasIs, // as is - Labs, // absolute - LaddTop // (addr + top) for mov(reg, label) with AutoGrow -}; - -} // inner - -/* - custom allocator -*/ -struct Allocator { - virtual uint8 *alloc(size_t size) { return reinterpret_cast(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); } - virtual void free(uint8 *p) { AlignedFree(p); } - virtual ~Allocator() {} - /* override to return false if you call protect() manually */ - virtual bool useProtect() const { return true; } -}; - -#ifdef XBYAK_USE_MMAP_ALLOCATOR -class MmapAllocator : Allocator { - typedef XBYAK_STD_UNORDERED_MAP SizeList; - SizeList sizeList_; -public: - uint8 *alloc(size_t size) - { - const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1; - size = (size + alignedSizeM1) & ~alignedSizeM1; -#ifdef MAP_ANONYMOUS - const int mode = MAP_PRIVATE | MAP_ANONYMOUS; -#elif defined(MAP_ANON) - const int mode = MAP_PRIVATE | MAP_ANON; -#else - #error "not supported" -#endif - void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0); - if (p == MAP_FAILED) throw Error(ERR_CANT_ALLOC); - assert(p); - sizeList_[(uintptr_t)p] = size; - return (uint8*)p; - } - void free(uint8 *p) - { - if (p == 0) return; - SizeList::iterator i = sizeList_.find((uintptr_t)p); - if (i == sizeList_.end()) throw Error(ERR_BAD_PARAMETER); - if (munmap((void*)i->first, i->second) < 0) throw Error(ERR_MUNMAP); - sizeList_.erase(i); - } -}; -#endif - -class Operand { -private: - uint8 idx_; // 0..15, MSB = 1 if spl/bpl/sil/dil - uint8 kind_; - uint16 bit_; -public: - enum Kind { - NONE = 0, - MEM = 1 << 1, - IMM = 1 << 2, - REG = 1 << 3, - MMX = 1 << 4, - XMM = 1 << 5, - FPU = 1 << 6, - YMM = 1 << 7 - }; - enum Code { -#ifdef XBYAK64 - RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, - R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D, - R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W, - R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B, - SPL = 4, BPL, SIL, DIL, -#endif - EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - AX = 0, CX, DX, BX, SP, BP, SI, DI, - AL = 0, CL, DL, BL, AH, CH, DH, BH - }; - Operand() : idx_(0), kind_(0), bit_(0) { } - Operand(int idx, Kind kind, int bit, bool ext8bit = 0) - : idx_(static_cast(idx | (ext8bit ? 0x80 : 0))) - , kind_(static_cast(kind)) - , bit_(static_cast(bit)) - { - assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two - } - Kind getKind() const { return static_cast(kind_); } - int getIdx() const { return idx_ & 15; } - bool isNone() const { return kind_ == 0; } - bool isMMX() const { return is(MMX); } - bool isXMM() const { return is(XMM); } - bool isYMM() const { return is(YMM); } - bool isREG(int bit = 0) const { return is(REG, bit); } - bool isMEM(int bit = 0) const { return is(MEM, bit); } - bool isFPU() const { return is(FPU); } - bool isExt8bit() const { return (idx_ & 0x80) != 0; } - // ah, ch, dh, bh? - bool isHigh8bit() const - { - if (!isBit(8)) return false; - if (isExt8bit()) return false; - const int idx = getIdx(); - return AH <= idx && idx <= BH; - } - // any bit is accetable if bit == 0 - bool is(int kind, uint32 bit = 0) const - { - return (kind_ & kind) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16) - } - bool isBit(uint32 bit) const { return (bit_ & bit) != 0; } - uint32 getBit() const { return bit_; } - const char *toString() const - { - const int idx = getIdx(); - if (kind_ == REG) { - if (isExt8bit()) { - static const char *tbl[4] = { "spl", "bpl", "sil", "dil" }; - return tbl[idx - 4]; - } - static const char *tbl[4][16] = { - { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, - { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" }, - { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }, - { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }, - }; - return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx]; - } else if (isYMM()) { - static const char *tbl[16] = { "ym0", "ym1", "ym2", "ym3", "ym4", "ym5", "ym6", "ym7", "ym8", "ym9", "ym10", "ym11", "ym12", "ym13", "ym14", "ym15" }; - return tbl[idx]; - } else if (isXMM()) { - static const char *tbl[16] = { "xm0", "xm1", "xm2", "xm3", "xm4", "xm5", "xm6", "xm7", "xm8", "xm9", "xm10", "xm11", "xm12", "xm13", "xm14", "xm15" }; - return tbl[idx]; - } else if (isMMX()) { - static const char *tbl[8] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }; - return tbl[idx]; - } else if (isFPU()) { - static const char *tbl[8] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" }; - return tbl[idx]; - } - throw Error(ERR_INTERNAL); - } - bool operator==(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_; } - bool operator!=(const Operand& rhs) const { return !operator==(rhs); } -}; - -class Label; - -struct Reg8; -struct Reg16; -struct Reg32; -#ifdef XBYAK64 -struct Reg64; -#endif -class Reg : public Operand { - bool hasRex() const { return isExt8bit() | isREG(64) | isExtIdx(); } -public: - Reg() { } - Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } - Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); } - bool isExtIdx() const { return getIdx() > 7; } - uint8 getRex(const Reg& base = Reg()) const - { - return (hasRex() || base.hasRex()) ? uint8(0x40 | ((isREG(64) | base.isREG(64)) ? 8 : 0) | (isExtIdx() ? 4 : 0)| (base.isExtIdx() ? 1 : 0)) : 0; - } - Reg8 cvt8() const; - Reg16 cvt16() const; - Reg32 cvt32() const; -#ifdef XBYAK64 - Reg64 cvt64() const; -#endif -}; - -struct Reg8 : public Reg { - explicit Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } -}; - -struct Reg16 : public Reg { - explicit Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } -}; - -struct Mmx : public Reg { - explicit Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } -}; - -struct Xmm : public Mmx { - explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } -}; - -struct Ymm : public Xmm { - explicit Ymm(int idx = 0) : Xmm(idx, Operand::YMM, 256) { } -}; - -struct Fpu : public Reg { - explicit Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } -}; - -struct Reg32e : public Reg { - explicit Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {} -}; -struct Reg32 : public Reg32e { - explicit Reg32(int idx = 0) : Reg32e(idx, 32) {} -}; -#ifdef XBYAK64 -struct Reg64 : public Reg32e { - explicit Reg64(int idx = 0) : Reg32e(idx, 64) {} -}; -struct RegRip { - sint64 disp_; - Label* label_; - explicit RegRip(sint64 disp = 0, Label* label = 0) : disp_(disp), label_(label) {} - friend const RegRip operator+(const RegRip& r, sint64 disp) { - return RegRip(r.disp_ + disp, r.label_); - } - friend const RegRip operator-(const RegRip& r, sint64 disp) { - return RegRip(r.disp_ - disp, r.label_); - } - friend const RegRip operator+(const RegRip& r, Label& label) { - if (r.label_) throw Error(ERR_BAD_ADDRESSING); - return RegRip(r.disp_, &label); - } -}; -#endif - -inline Reg8 Reg::cvt8() const -{ - const int idx = getIdx(); - if (isBit(8)) return Reg8(idx, isExt8bit()); -#ifdef XBYAK32 - if (idx >= 4) throw Error(ERR_CANT_CONVERT); -#endif - return Reg8(idx, 4 <= idx && idx < 8); -} - -inline Reg16 Reg::cvt16() const -{ - const int idx = getIdx(); - if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT); - return Reg16(idx); -} - -inline Reg32 Reg::cvt32() const -{ - const int idx = getIdx(); - if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT); - return Reg32(idx); -} - -#ifdef XBYAK64 -inline Reg64 Reg::cvt64() const -{ - const int idx = getIdx(); - if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT); - return Reg64(idx); -} -#endif - -class RegExp { -public: - struct SReg { - uint16 bit:9; // 32/64/128/256 none if 0 - uint16 idx:7; - SReg() : bit(0), idx(0) { } - void set(const Reg& r) { this->bit = uint16(r.getBit()); this->idx = uint16(r.getIdx()); } - bool operator==(const SReg& rhs) const { return bit == rhs.bit && idx == rhs.idx; } - }; - RegExp(size_t disp = 0) : disp_(disp), scale_(0) { } - RegExp(const Reg& r, int scale = 1) - : disp_(0) - , scale_(scale) - { - if (!r.is(Reg::REG, 32|64) && !r.is(Reg::XMM|Reg::YMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER); - if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw Error(ERR_BAD_SCALE); - if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index - index_.set(r); - } else { - base_.set(r); - } - } - bool isVsib() const { return index_.bit >= 128; } - bool isYMM() const { return index_.bit >= 256; } - RegExp optimize() const // select smaller size - { - // [reg * 2] => [reg + reg] - if (!isVsib() && !base_.bit && index_.bit && scale_ == 2) { - RegExp ret = *this; - ret.base_ = index_; - ret.scale_ = 1; - return ret; - } - return *this; - } - bool operator==(const RegExp& rhs) const - { - return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_; - } - const SReg& getBase() const { return base_; } - const SReg& getIndex() const { return index_; } - int getScale() const { return scale_; } - uint32 getDisp() const { return uint32(disp_); } - void verify() const - { - if (base_.bit >= 128) throw Error(ERR_BAD_SIZE_OF_REGISTER); - if (index_.bit && index_.bit <= 64) { - if (index_.idx == Operand::ESP) throw Error(ERR_ESP_CANT_BE_INDEX); - if (base_.bit && base_.bit != index_.bit) throw Error(ERR_BAD_SIZE_OF_REGISTER); - } - } -private: - friend RegExp operator+(const RegExp& a, const RegExp& b); - friend RegExp operator-(const RegExp& e, size_t disp); - /* - [base_ + index_ * scale_ + disp_] - base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm - */ - size_t disp_; - int scale_; - SReg base_; - SReg index_; -}; - -inline RegExp operator+(const RegExp& a, const RegExp& b) -{ - if (a.index_.bit && b.index_.bit) throw Error(ERR_BAD_ADDRESSING); - RegExp ret = a; - if (!ret.index_.bit) { ret.index_ = b.index_; ret.scale_ = b.scale_; } - if (b.base_.bit) { - if (ret.base_.bit) { - if (ret.index_.bit) throw Error(ERR_BAD_ADDRESSING); - // base + base => base + index * 1 - ret.index_ = b.base_; - // [reg + esp] => [esp + reg] - if (ret.index_.idx == Operand::ESP) std::swap(ret.base_, ret.index_); - ret.scale_ = 1; - } else { - ret.base_ = b.base_; - } - } - ret.disp_ += b.disp_; - return ret; -} -inline RegExp operator*(const Reg& r, int scale) -{ - return RegExp(r, scale); -} -inline RegExp operator-(const RegExp& e, size_t disp) -{ - RegExp ret = e; - ret.disp_ -= disp; - return ret; -} - -// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc) -void *const AutoGrow = (void*)1; - -class CodeArray { - enum Type { - USER_BUF = 1, // use userPtr(non alignment, non protect) - ALLOC_BUF, // use new(alignment, protect) - AUTO_GROW // automatically move and grow memory if necessary - }; - CodeArray(const CodeArray& rhs); - void operator=(const CodeArray&); - bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; } - struct AddrInfo { - size_t codeOffset; // position to write - size_t jmpAddr; // value to write - int jmpSize; // size of jmpAddr - inner::LabelMode mode; - AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode) - : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {} - uint64 getVal(const uint8 *top) const - { - uint64 disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); - if (jmpSize == 4) disp = inner::VerifyInInt32(disp); - return disp; - } - }; - typedef std::list AddrInfoList; - AddrInfoList addrInfoList_; - const Type type_; -#ifdef XBYAK_USE_MMAP_ALLOCATOR - MmapAllocator defaultAllocator_; -#else - Allocator defaultAllocator_; -#endif - Allocator *alloc_; -protected: - size_t maxSize_; - uint8 *top_; - size_t size_; - - /* - allocate new memory and copy old data to the new area - */ - void growMemory() - { - const size_t newSize = (std::max)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2); - uint8 *newTop = alloc_->alloc(newSize); - if (newTop == 0) throw Error(ERR_CANT_ALLOC); - for (size_t i = 0; i < size_; i++) newTop[i] = top_[i]; - alloc_->free(top_); - top_ = newTop; - maxSize_ = newSize; - } - /* - calc jmp address for AutoGrow mode - */ - void calcJmpAddress() - { - for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) { - uint64 disp = i->getVal(top_); - rewrite(i->codeOffset, disp, i->jmpSize); - } - if (alloc_->useProtect() && !protect(top_, size_, true)) throw Error(ERR_CANT_PROTECT); - } -public: - explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0) - : type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF) - , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_) - , maxSize_(maxSize) - , top_(type_ == USER_BUF ? reinterpret_cast(userPtr) : alloc_->alloc((std::max)(maxSize, 1))) - , size_(0) - { - if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC); - if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) { - alloc_->free(top_); - throw Error(ERR_CANT_PROTECT); - } - } - virtual ~CodeArray() - { - if (isAllocType()) { - if (alloc_->useProtect()) protect(top_, maxSize_, false); - alloc_->free(top_); - } - } - void resetSize() - { - size_ = 0; - addrInfoList_.clear(); - } - void db(int code) - { - if (size_ >= maxSize_) { - if (type_ == AUTO_GROW) { - growMemory(); - } else { - throw Error(ERR_CODE_IS_TOO_BIG); - } - } - top_[size_++] = static_cast(code); - } - void db(const uint8 *code, int codeSize) - { - for (int i = 0; i < codeSize; i++) db(code[i]); - } - void db(uint64 code, int codeSize) - { - if (codeSize > 8) throw Error(ERR_BAD_PARAMETER); - for (int i = 0; i < codeSize; i++) db(static_cast(code >> (i * 8))); - } - void dw(uint32 code) { db(code, 2); } - void dd(uint32 code) { db(code, 4); } - void dq(uint64 code) { db(code, 8); } - const uint8 *getCode() const { return top_; } - template - const F getCode() const { return CastTo(top_); } - const uint8 *getCurr() const { return &top_[size_]; } - template - const F getCurr() const { return CastTo(&top_[size_]); } - size_t getSize() const { return size_; } - void setSize(size_t size) - { - if (size > maxSize_) throw Error(ERR_OFFSET_IS_TOO_BIG); - size_ = size; - } - void dump() const - { - const uint8 *p = getCode(); - size_t bufSize = getSize(); - size_t remain = bufSize; - for (int i = 0; i < 4; i++) { - size_t disp = 16; - if (remain < 16) { - disp = remain; - } - for (size_t j = 0; j < 16; j++) { - if (j < disp) { - printf("%02X", p[i * 16 + j]); - } - } - putchar('\n'); - remain -= disp; - if (remain <= 0) { - break; - } - } - } - /* - @param offset [in] offset from top - @param disp [in] offset from the next of jmp - @param size [in] write size(1, 2, 4, 8) - */ - void rewrite(size_t offset, uint64 disp, size_t size) - { - assert(offset < maxSize_); - if (size != 1 && size != 2 && size != 4 && size != 8) throw Error(ERR_BAD_PARAMETER); - uint8 *const data = top_ + offset; - for (size_t i = 0; i < size; i++) { - data[i] = static_cast(disp >> (i * 8)); - } - } - void save(size_t offset, size_t val, int size, inner::LabelMode mode) - { - addrInfoList_.push_back(AddrInfo(offset, val, size, mode)); - } - bool isAutoGrow() const { return type_ == AUTO_GROW; } - /** - change exec permission of memory - @param addr [in] buffer address - @param size [in] buffer size - @param canExec [in] true(enable to exec), false(disable to exec) - @return true(success), false(failure) - */ - static inline bool protect(const void *addr, size_t size, bool canExec) - { -#if defined(_WIN32) - DWORD oldProtect; - return VirtualProtect(const_cast(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0; -#elif defined(__GNUC__) - size_t pageSize = sysconf(_SC_PAGESIZE); - size_t iaddr = reinterpret_cast(addr); - size_t roundAddr = iaddr & ~(pageSize - static_cast(1)); - int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0); - return mprotect(reinterpret_cast(roundAddr), size + (iaddr - roundAddr), mode) == 0; -#else - return true; -#endif - } - /** - get aligned memory pointer - @param addr [in] address - @param alingedSize [in] power of two - @return aligned addr by alingedSize - */ - static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = 16) - { - return reinterpret_cast((reinterpret_cast(addr) + alignedSize - 1) & ~(alignedSize - static_cast(1))); - } -}; - -class Address : public Operand { - mutable uint8 top_[6]; // 6 = 1(ModRM) + 1(SIB) + 4(disp) - uint8 size_; - uint8 rex_; - size_t disp_; - const Label* label_; - bool isOnlyDisp_; - bool is64bitDisp_; - bool is32bit_; - mutable bool isVsib_; - bool isYMM_; - void verify() const { if (isVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); } -public: - Address(uint32 sizeBit, bool isOnlyDisp, size_t disp, bool is32bit, bool is64bitDisp = false, bool isVsib = false, bool isYMM = false) - : Operand(0, MEM, sizeBit) - , size_(0) - , rex_(0) - , disp_(disp) - , label_(0) - , isOnlyDisp_(isOnlyDisp) - , is64bitDisp_(is64bitDisp) - , is32bit_(is32bit) - , isVsib_(isVsib) - , isYMM_(isYMM) - { - } - void db(int code) - { - if (size_ >= sizeof(top_)) throw Error(ERR_CODE_IS_TOO_BIG); - top_[size_++] = static_cast(code); - } - void dd(uint32 code) { for (int i = 0; i < 4; i++) db(code >> (i * 8)); } - const uint8 *getCode() const { return top_; } - size_t getSize() const { return size_; } - void updateRegField(uint8 regIdx) const - { - *top_ = (*top_ & B11000111) | ((regIdx << 3) & B00111000); - } - void setVsib(bool isVsib) const { isVsib_ = isVsib; } - bool isVsib() const { return isVsib_; } - bool isYMM() const { return isYMM_; } - bool is32bit() const { verify(); return is32bit_; } - bool isOnlyDisp() const { verify(); return isOnlyDisp_; } // for mov eax - size_t getDisp() const { verify(); return disp_; } - uint8 getRex() const { verify(); return rex_; } - bool is64bitDisp() const { verify(); return is64bitDisp_; } // for moffset - void setRex(uint8 rex) { rex_ = rex; } - void setLabel(const Label* label) { label_ = label; } - const Label* getLabel() const { return label_; } -}; - -class AddressFrame { -private: - void operator=(const AddressFrame&); - Address makeAddress(const RegExp& e) const - { - e.verify(); - const bool isVsib = e.isVsib(); - const bool isYMM = e.isYMM(); - const RegExp::SReg& base = e.getBase(); - const RegExp::SReg& index = e.getIndex(); - const uint32 disp = e.getDisp(); - Address frame(bit_, (!base.bit && !index.bit), disp, base.bit == 32 || index.bit == 32, false, isVsib, isYMM); - enum { - mod00 = 0, mod01 = 1, mod10 = 2 - }; - int mod; - if (!base.bit || ((base.idx & 7) != Operand::EBP && disp == 0)) { - mod = mod00; - } else if (inner::IsInDisp8(disp)) { - mod = mod01; - } else { - mod = mod10; - } - const int baseIdx = base.bit ? (base.idx & 7) : Operand::EBP; - /* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */ - bool hasSIB = index.bit || (base.idx & 7) == Operand::ESP; -#ifdef XBYAK64 - if (!base.bit && !index.bit) hasSIB = true; -#endif - if (hasSIB) { - frame.db((mod << 6) | Operand::ESP); - /* SIB = [2:3:3] = [SS:index:base(=rm)] */ - const int indexIdx = index.bit ? (index.idx & 7) : Operand::ESP; - const int scale = e.getScale(); - const int ss = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0; - frame.db((ss << 6) | (indexIdx << 3) | baseIdx); - } else { - frame.db((mod << 6) | baseIdx); - } - if (mod == mod01) { - frame.db(disp); - } else if (mod == mod10 || (mod == mod00 && !base.bit)) { - frame.dd(disp); - } - int rex = ((index.idx >> 3) << 1) | (base.idx >> 3); - if (rex) rex |= 0x40; - frame.setRex(uint8(rex)); - return frame; - } -public: - const uint32 bit_; - explicit AddressFrame(uint32 bit) : bit_(bit) { } - Address operator[](const void *disp) const - { - size_t adr = reinterpret_cast(disp); -#ifdef XBYAK64 - if (adr > 0xFFFFFFFFU) throw Error(ERR_OFFSET_IS_TOO_BIG); -#endif - RegExp e(static_cast(adr)); - return operator[](e); - } -#ifdef XBYAK64 - Address operator[](uint64 disp) const - { - return Address(64, true, disp, false, true); - } - Address operator[](const RegRip& addr) const - { - Address frame(bit_, true, addr.disp_, false); - frame.db(0x05); - if (addr.label_) { - frame.setLabel(addr.label_); - } else { - frame.dd(inner::VerifyInInt32(addr.disp_)); - } - return frame; - } -#endif - Address operator[](const RegExp& e) const - { - return makeAddress(e.optimize()); - } -}; - -struct JmpLabel { - size_t endOfJmp; /* offset from top to the end address of jmp */ - int jmpSize; - inner::LabelMode mode; - size_t disp; // disp for [rip + disp] - explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0) - : endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp) - { - } -}; - -class LabelManager; - -class Label { - mutable LabelManager *mgr; - mutable int id; - friend class LabelManager; -public: - Label() : mgr(0), id(0) {} - Label(const Label& rhs); - Label& operator=(const Label& rhs); - ~Label(); - int getId() const { return id; } - - // backward compatibility - static std::string toStr(int num) - { - char buf[16]; -#ifdef _MSC_VER - _snprintf_s -#else - snprintf -#endif - (buf, sizeof(buf), ".%08x", num); - return buf; - } -}; - -class LabelManager { - // for string label - struct SlabelVal { - size_t offset; - SlabelVal(size_t offset) : offset(offset) {} - }; - typedef XBYAK_STD_UNORDERED_MAP SlabelDefList; - typedef XBYAK_STD_UNORDERED_MULTIMAP SlabelUndefList; - struct SlabelState { - SlabelDefList defList; - SlabelUndefList undefList; - }; - typedef std::list StateList; - // for Label class - struct ClabelVal { - ClabelVal(size_t offset = 0) : offset(offset), refCount(1) {} - size_t offset; - int refCount; - }; - typedef XBYAK_STD_UNORDERED_MAP ClabelDefList; - typedef XBYAK_STD_UNORDERED_MULTIMAP ClabelUndefList; - - CodeArray *base_; - // global : stateList_.front(), local : stateList_.back() - StateList stateList_; - mutable int labelId_; - ClabelDefList clabelDefList_; - ClabelUndefList clabelUndefList_; - - int getId(const Label& label) const - { - if (label.id == 0) label.id = labelId_++; - return label.id; - } - template - void define_inner(DefList& defList, UndefList& undefList, const T& labelId, size_t addrOffset) - { - // add label - typename DefList::value_type item(labelId, addrOffset); - std::pair ret = defList.insert(item); - if (!ret.second) throw Error(ERR_LABEL_IS_REDEFINED); - // search undefined label - for (;;) { - typename UndefList::iterator itr = undefList.find(labelId); - if (itr == undefList.end()) break; - const JmpLabel *jmp = &itr->second; - const size_t offset = jmp->endOfJmp - jmp->jmpSize; - size_t disp; - if (jmp->mode == inner::LaddTop) { - disp = addrOffset; - } else if (jmp->mode == inner::Labs) { - disp = size_t(base_->getCurr()); - } else { - disp = addrOffset - jmp->endOfJmp + jmp->disp; -#ifdef XBYAK64 - if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) throw Error(ERR_OFFSET_IS_TOO_BIG); -#endif - if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) throw Error(ERR_LABEL_IS_TOO_FAR); - } - if (base_->isAutoGrow()) { - base_->save(offset, disp, jmp->jmpSize, jmp->mode); - } else { - base_->rewrite(offset, disp, jmp->jmpSize); - } - undefList.erase(itr); - } - } - template - bool getOffset_inner(const DefList& defList, size_t *offset, const T& label) const - { - typename DefList::const_iterator i = defList.find(label); - if (i == defList.end()) return false; - *offset = i->second.offset; - return true; - } - friend class Label; - void incRefCount(int id) { clabelDefList_[id].refCount++; } - void decRefCount(int id) - { - ClabelDefList::iterator i = clabelDefList_.find(id); - if (i == clabelDefList_.end()) return; - if (i->second.refCount == 1) { - clabelDefList_.erase(id); - } else { - --i->second.refCount; - } - } - template - bool hasUndefinedLabel_inner(const T& list) const - { -#ifndef NDEBUG - for (typename T::const_iterator i = list.begin(); i != list.end(); ++i) { - std::cerr << "undefined label:" << i->first << std::endl; - } -#endif - return !list.empty(); - } -public: - LabelManager() - { - reset(); - } - void reset() - { - base_ = 0; - labelId_ = 1; - stateList_.clear(); - stateList_.push_back(SlabelState()); - stateList_.push_back(SlabelState()); - } - void enterLocal() - { - stateList_.push_back(SlabelState()); - } - void leaveLocal() - { - if (stateList_.size() <= 2) throw Error(ERR_UNDER_LOCAL_LABEL); - if (hasUndefinedLabel_inner(stateList_.back().undefList)) throw Error(ERR_LABEL_IS_NOT_FOUND); - stateList_.pop_back(); - } - void set(CodeArray *base) { base_ = base; } - void defineSlabel(std::string label) - { - if (label == "@b" || label == "@f") throw Error(ERR_BAD_LABEL_STR); - if (label == "@@") { - SlabelDefList& defList = stateList_.front().defList; - SlabelDefList::iterator i = defList.find("@f"); - if (i != defList.end()) { - defList.erase(i); - label = "@b"; - } else { - i = defList.find("@b"); - if (i != defList.end()) { - defList.erase(i); - } - label = "@f"; - } - } - SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); - define_inner(st.defList, st.undefList, label, base_->getSize()); - } - void defineClabel(const Label& label) - { - define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize()); - label.mgr = this; - } - void assign(Label& dst, const Label& src) - { - ClabelDefList::const_iterator i = clabelDefList_.find(src.id); - if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L); - define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset); - dst.mgr = this; - } - bool getOffset(size_t *offset, std::string& label) const - { - const SlabelDefList& defList = stateList_.front().defList; - if (label == "@b") { - if (defList.find("@f") != defList.end()) { - label = "@f"; - } else if (defList.find("@b") == defList.end()) { - throw Error(ERR_LABEL_IS_NOT_FOUND); - } - } else if (label == "@f") { - if (defList.find("@f") != defList.end()) { - label = "@b"; - } - } - const SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); - return getOffset_inner(st.defList, offset, label); - } - bool getOffset(size_t *offset, const Label& label) const - { - return getOffset_inner(clabelDefList_, offset, getId(label)); - } - void addUndefinedLabel(const std::string& label, const JmpLabel& jmp) - { - SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); - st.undefList.insert(SlabelUndefList::value_type(label, jmp)); - } - void addUndefinedLabel(const Label& label, const JmpLabel& jmp) - { - clabelUndefList_.insert(ClabelUndefList::value_type(label.id, jmp)); - } - bool hasUndefSlabel() const - { - for (StateList::const_iterator i = stateList_.begin(), ie = stateList_.end(); i != ie; ++i) { - if (hasUndefinedLabel_inner(i->undefList)) return true; - } - return false; - } - bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); } -}; - -inline Label::Label(const Label& rhs) -{ - id = rhs.id; - mgr = rhs.mgr; - if (mgr) mgr->incRefCount(id); -} -inline Label& Label::operator=(const Label& rhs) -{ - if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L); - id = rhs.id; - mgr = rhs.mgr; - if (mgr) mgr->incRefCount(id); - return *this; -} -inline Label::~Label() -{ - if (id && mgr) mgr->decRefCount(id); -} - -class CodeGenerator : public CodeArray { -public: - enum LabelType { - T_SHORT, - T_NEAR, - T_AUTO // T_SHORT if possible - }; -private: - CodeGenerator operator=(const CodeGenerator&); // don't call -#ifdef XBYAK64 - enum { i32e = 32 | 64, BIT = 64 }; - static const size_t dummyAddr = (size_t(0x11223344) << 32) | 55667788; - typedef Reg64 NativeReg; -#else - enum { i32e = 32, BIT = 32 }; - static const size_t dummyAddr = 0x12345678; - typedef Reg32 NativeReg; -#endif - // (XMM, XMM|MEM) - static inline bool isXMM_XMMorMEM(const Operand& op1, const Operand& op2) - { - return op1.isXMM() && (op2.isXMM() || op2.isMEM()); - } - // (MMX, MMX|MEM) or (XMM, XMM|MEM) - static inline bool isXMMorMMX_MEM(const Operand& op1, const Operand& op2) - { - return (op1.isMMX() && (op2.isMMX() || op2.isMEM())) || isXMM_XMMorMEM(op1, op2); - } - // (XMM, MMX|MEM) - static inline bool isXMM_MMXorMEM(const Operand& op1, const Operand& op2) - { - return op1.isXMM() && (op2.isMMX() || op2.isMEM()); - } - // (MMX, XMM|MEM) - static inline bool isMMX_XMMorMEM(const Operand& op1, const Operand& op2) - { - return op1.isMMX() && (op2.isXMM() || op2.isMEM()); - } - // (XMM, REG32|MEM) - static inline bool isXMM_REG32orMEM(const Operand& op1, const Operand& op2) - { - return op1.isXMM() && (op2.isREG(i32e) || op2.isMEM()); - } - // (REG32, XMM|MEM) - static inline bool isREG32_XMMorMEM(const Operand& op1, const Operand& op2) - { - return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM()); - } - // (REG32, REG32|MEM) - static inline bool isREG32_REG32orMEM(const Operand& op1, const Operand& op2) - { - return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM()); - } - void rex(const Operand& op1, const Operand& op2 = Operand()) - { - uint8 rex = 0; - const Operand *p1 = &op1, *p2 = &op2; - if (p1->isMEM()) std::swap(p1, p2); - if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION); - if (p2->isMEM()) { - const Address& addr = static_cast(*p2); - if (BIT == 64 && addr.is32bit()) db(0x67); - rex = addr.getRex() | static_cast(*p1).getRex(); - } else { - // ModRM(reg, base); - rex = static_cast(op2).getRex(static_cast(op1)); - } - // except movsx(16bit, 32/64bit) - if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66); - if (rex) db(rex); - } - enum AVXtype { - PP_NONE = 1 << 0, - PP_66 = 1 << 1, - PP_F3 = 1 << 2, - PP_F2 = 1 << 3, - MM_RESERVED = 1 << 4, - MM_0F = 1 << 5, - MM_0F38 = 1 << 6, - MM_0F3A = 1 << 7 - }; - void vex(bool r, int idx, bool is256, int type, bool x = false, bool b = false, int w = 1) - { - uint32 pp = (type & PP_66) ? 1 : (type & PP_F3) ? 2 : (type & PP_F2) ? 3 : 0; - uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp; - if (!b && !x && !w && (type & MM_0F)) { - db(0xC5); db((r ? 0 : 0x80) | vvvv); - } else { - uint32 mmmm = (type & MM_0F) ? 1 : (type & MM_0F38) ? 2 : (type & MM_0F3A) ? 3 : 0; - db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv); - } - } - LabelManager labelMgr_; - bool isInDisp16(uint32 x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } - uint8 getModRM(int mod, int r1, int r2) const { return static_cast((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)); } - void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE) - { - rex(reg2, reg1); - db(code0 | (reg1.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); - db(getModRM(3, reg1.getIdx(), reg2.getIdx())); - } - void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE) - { - if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); - rex(addr, reg); - db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); - addr.updateRegField(static_cast(reg.getIdx())); - opAddr(addr); - } - void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) - { - const int shortJmpSize = 2; - const int longHeaderSize = longPref ? 2 : 1; - const int longJmpSize = longHeaderSize + 4; - if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) { - db(shortCode); db(disp - shortJmpSize); - } else { - if (type == T_SHORT) throw Error(ERR_LABEL_IS_TOO_FAR); - if (longPref) db(longPref); - db(longCode); dd(disp - longJmpSize); - } - } - template - void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) - { - if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */ - size_t offset = 0; - if (labelMgr_.getOffset(&offset, label)) { /* label exists */ - makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref); - } else { - int jmpSize = 0; - if (type == T_NEAR) { - jmpSize = 4; - if (longPref) db(longPref); - db(longCode); dd(0); - } else { - jmpSize = 1; - db(shortCode); db(0); - } - JmpLabel jmp(size_, jmpSize, inner::LasIs); - labelMgr_.addUndefinedLabel(label, jmp); - } - } - void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode) - { - if (isAutoGrow()) { - if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW); - if (size_ + 16 >= maxSize_) growMemory(); - db(longCode); - dd(0); - save(size_ - 4, size_t(addr) - size_, 4, inner::Labs); - } else { - makeJmp(inner::VerifyInInt32(reinterpret_cast(addr) - getCurr()), type, shortCode, longCode, 0); - } - - } - void opAddr(const Address &addr) - { - db(addr.getCode(), static_cast(addr.getSize())); - if (addr.getLabel()) { // [rip + Label] - putL_inner(*addr.getLabel(), true, addr.getDisp()); - } - } - /* preCode is for SSSE3/SSE4 */ - void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE) - { - if (isValid && !isValid(reg, op)) throw Error(ERR_BAD_COMBINATION); - if (pref != NONE) db(pref); - if (op.isMEM()) { - opModM(static_cast(op), static_cast(reg), 0x0F, preCode, code); - } else { - opModR(static_cast(reg), static_cast(op), 0x0F, preCode, code); - } - if (imm8 != NONE) db(imm8); - } - void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext) - { - if (mmx.isXMM()) db(0x66); - opModR(Reg32(ext), mmx, 0x0F, code); - db(imm8); - } - void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE) - { - opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode); - } - void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref) - { - if (pref != NONE) db(pref); - if (op1.isXMM() && op2.isMEM()) { - opModM(static_cast(op2), static_cast(op1), 0x0F, code); - } else if (op1.isMEM() && op2.isXMM()) { - opModM(static_cast(op1), static_cast(op2), 0x0F, code | 1); - } else { - throw Error(ERR_BAD_COMBINATION); - } - } - void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false) - { - if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */ - if (mmx.isXMM()) db(0x66); - opModR(static_cast(op), mmx, 0x0F, B11000101); db(imm); - } else { - opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, B00111010); - } - } - void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false) - { - int opBit = op.getBit(); - if (disableRex && opBit == 64) opBit = 32; - if (op.isREG(bit)) { - opModR(Reg(ext, Operand::REG, opBit), static_cast(op).changeBit(opBit), code0, code1, code2); - } else if (op.isMEM()) { - opModM(static_cast(op), Reg(ext, Operand::REG, opBit), code0, code1, code2); - } else { - throw Error(ERR_BAD_COMBINATION); - } - } - void opShift(const Operand& op, int imm, int ext) - { - verifyMemHasSize(op); - opR_ModM(op, 0, ext, (B11000000 | ((imm == 1 ? 1 : 0) << 4))); - if (imm != 1) db(imm); - } - void opShift(const Operand& op, const Reg8& cl, int ext) - { - if (cl.getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION); - opR_ModM(op, 0, ext, B11010010); - } - void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE) - { - if (condR) { - opModR(static_cast(op1), static_cast(op2), code0, code1, code2); - } else if (condM) { - opModM(static_cast(op2), static_cast(op1), code0, code1, code2); - } else { - throw Error(ERR_BAD_COMBINATION); - } - } - void opShxd(const Operand& op, const Reg& reg, uint8 imm, int code, const Reg8 *cl = 0) - { - if (cl && cl->getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION); - opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (cl ? 1 : 0)); - if (!cl) db(imm); - } - // (REG, REG|MEM), (MEM, REG) - void opRM_RM(const Operand& op1, const Operand& op2, int code) - { - if (op1.isREG() && op2.isMEM()) { - opModM(static_cast(op2), static_cast(op1), code | 2); - } else { - opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code); - } - } - // (REG|MEM, IMM) - void opRM_I(const Operand& op, uint32 imm, int code, int ext) - { - verifyMemHasSize(op); - uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32; - if (op.isBit(8)) immBit = 8; - if (op.getBit() < immBit) throw Error(ERR_IMM_IS_TOO_BIG); - if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */ - if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al - rex(op); - db(code | 4 | (immBit == 8 ? 0 : 1)); - } else { - int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0; - opR_ModM(op, 0, ext, B10000000 | tmp); - } - db(imm, immBit / 8); - } - void opIncDec(const Operand& op, int code, int ext) - { - verifyMemHasSize(op); -#ifndef XBYAK64 - if (op.isREG() && !op.isBit(8)) { - rex(op); db(code | op.getIdx()); - return; - } -#endif - code = B11111110; - if (op.isREG()) { - opModR(Reg(ext, Operand::REG, op.getBit()), static_cast(op), code); - } else { - opModM(static_cast(op), Reg(ext, Operand::REG, op.getBit()), code); - } - } - void opPushPop(const Operand& op, int code, int ext, int alt) - { - if (op.isREG()) { - if (op.isBit(16)) db(0x66); - if (static_cast(op).getIdx() >= 8) db(0x41); - db(alt | (op.getIdx() & 7)); - } else if (op.isMEM()) { - opModM(static_cast(op), Reg(ext, Operand::REG, op.getBit()), code); - } else { - throw Error(ERR_BAD_COMBINATION); - } - } - void verifyMemHasSize(const Operand& op) const - { - if (op.isMEM() && op.getBit() == 0) throw Error(ERR_MEM_SIZE_IS_NOT_SPECIFIED); - } - void opMovxx(const Reg& reg, const Operand& op, uint8 code) - { - if (op.isBit(32)) throw Error(ERR_BAD_COMBINATION); - int w = op.isBit(16); -#ifdef XBYAK64 - if (op.isHigh8bit()) throw Error(ERR_BAD_COMBINATION); -#endif - bool cond = reg.isREG() && (reg.getBit() > op.getBit()); - opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w); - } - void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext) - { - if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); - uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0; - if (!code) throw Error(ERR_BAD_MEM_SIZE); - if (m64ext && addr.isBit(64)) ext = m64ext; - - rex(addr, st0); - db(code); - addr.updateRegField(ext); - opAddr(addr); - } - // use code1 if reg1 == st0 - // use code2 if reg1 != st0 && reg2 == st0 - void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2) - { - uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; - if (!code) throw Error(ERR_BAD_ST_COMBINATION); - db(uint8(code >> 8)); - db(uint8(code | (reg1.getIdx() | reg2.getIdx()))); - } - void opFpu(const Fpu& reg, uint8 code1, uint8 code2) - { - db(code1); db(code2 | reg.getIdx()); - } - void opVex(const Reg& r, const Operand *p1, const Operand *p2, int type, int code, int w) - { - bool x, b; - if (p2->isMEM()) { - const Address& addr = static_cast(*p2); - uint8 rex = addr.getRex(); - x = (rex & 2) != 0; - b = (rex & 1) != 0; - if (BIT == 64 && addr.is32bit()) db(0x67); - if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0; - } else { - x = false; - b = static_cast(*p2).isExtIdx(); - } - if (w == -1) w = 0; - vex(r.isExtIdx(), p1 ? p1->getIdx() : 0, r.isYMM(), type, x, b, w); - db(code); - if (p2->isMEM()) { - const Address& addr = static_cast(*p2); - addr.updateRegField(static_cast(r.getIdx())); - opAddr(addr); - } else { - db(getModRM(3, r.getIdx(), p2->getIdx())); - } - } - // (r, r, r/m) if isR_R_RM - // (r, r/m, r) - void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8 code, bool isR_R_RM) - { - const Operand *p1 = &op1; - const Operand *p2 = &op2; - if (!isR_R_RM) std::swap(p1, p2); - const unsigned int bit = r.getBit(); - if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) throw Error(ERR_BAD_COMBINATION); - int w = bit == 64; - opVex(r, p1, p2, type, code, w); - } - void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, bool supportYMM, int w = -1) - { - const Xmm *x2; - const Operand *op; - if (op2.isNone()) { - x2 = &x1; - op = &op1; - } else { - if (!(op1.isXMM() || (supportYMM && op1.isYMM()))) throw Error(ERR_BAD_COMBINATION); - x2 = static_cast(&op1); - op = &op2; - } - // (x1, x2, op) - if (!((x1.isXMM() && x2->isXMM()) || (supportYMM && x1.isYMM() && x2->isYMM()))) throw Error(ERR_BAD_COMBINATION); - opVex(x1, x2, op, type, code0, w); - } - // if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op - void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, bool supportYMM, int w = -1) - { - // use static_cast to avoid calling unintentional copy constructor on gcc - opAVX_X_X_XM(x1, op1, cvt ? kind == Operand::XMM ? static_cast(Xmm(op2.getIdx())) : static_cast(Ymm(op2.getIdx())) : op2, type, code0, supportYMM, w); - } - // support (x, x/m, imm), (y, y/m, imm) - void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, bool supportYMM, int w = -1, int imm = NONE) - { - opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, op, type, code, supportYMM, w); if (imm != NONE) db((uint8)imm); - } - // QQQ:need to refactor - void opSp1(const Reg& reg, const Operand& op, uint8 pref, uint8 code0, uint8 code1) - { - if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); - bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); - if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); - if (is16bit) db(0x66); - db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1); - } - void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int w, int mode) - { - if (!addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING); - const int y_vx_y = 0; - const int y_vy_y = 1; -// const int x_vy_x = 2; - const bool isAddrYMM = addr.isYMM(); - if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) { - bool isOK = false; - if (mode == y_vx_y) { - isOK = x1.isYMM() && !isAddrYMM && x2.isYMM(); - } else if (mode == y_vy_y) { - isOK = x1.isYMM() && isAddrYMM && x2.isYMM(); - } else { // x_vy_x - isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM(); - } - if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING); - } - addr.setVsib(false); - opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code, true, w); - addr.setVsib(true); - } -public: - unsigned int getVersion() const { return VERSION; } - using CodeArray::db; - const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; - const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; - const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; - const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; - const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; - const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi; - const Reg16 ax, cx, dx, bx, sp, bp, si, di; - const Reg8 al, cl, dl, bl, ah, ch, dh, bh; - const AddressFrame ptr, byte, word, dword, qword; - const Fpu st0, st1, st2, st3, st4, st5, st6, st7; -#ifdef XBYAK64 - const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15; - const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d; - const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w; - const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b; - const Reg8 spl, bpl, sil, dil; - const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15; - const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15; - const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience - const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15; - const RegRip rip; -#endif - void L(const std::string& label) { labelMgr_.defineSlabel(label); } - void L(const Label& label) { labelMgr_.defineClabel(label); } - /* - assign src to dst - require - dst : does not used by L() - src : used by L() - */ - void assignL(Label& dst, const Label& src) { labelMgr_.assign(dst, src); } - void inLocalLabel() { labelMgr_.enterLocal(); } - void outLocalLabel() { labelMgr_.leaveLocal(); } - void jmp(std::string label, LabelType type = T_AUTO) - { - opJmp(label, type, B11101011, B11101001, 0); - } - void jmp(const Label& label, LabelType type = T_AUTO) - { - opJmp(label, type, B11101011, B11101001, 0); - } - void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); } - void jmp(const void *addr, LabelType type = T_AUTO) - { - opJmpAbs(addr, type, B11101011, B11101001); - } - void jmp(const Operand& op) - { - opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); - } - void call(const Operand& op) - { - opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true); - } - // (REG|MEM, REG) - void test(const Operand& op, const Reg& reg) - { - opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), B10000100); - } - // (REG|MEM, IMM) - void test(const Operand& op, uint32 imm) - { - verifyMemHasSize(op); - if (op.isREG() && op.getIdx() == 0) { // al, ax, eax - rex(op); - db(B10101000 | (op.isBit(8) ? 0 : 1)); - } else { - opR_ModM(op, 0, 0, B11110110); - } - db(imm, (std::min)(op.getBit() / 8, 4U)); - } - void ret(int imm = 0) - { - if (imm) { - db(B11000010); dw(imm); - } else { - db(B11000011); - } - } - // (REG16|REG32, REG16|REG32|MEM) - void imul(const Reg& reg, const Operand& op) - { - opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x0F, B10101111); - } - void imul(const Reg& reg, const Operand& op, int imm) - { - int s = inner::IsInDisp8(imm) ? 1 : 0; - opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), B01101001 | (s << 1)); - int size = s ? 1 : reg.isREG(16) ? 2 : 4; - db(imm, size); - } - void pop(const Operand& op) - { - opPushPop(op, B10001111, 0, B01011000); - } - void push(const Operand& op) - { - opPushPop(op, B11111111, 6, B01010000); - } - void push(const AddressFrame& af, uint32 imm) - { - if (af.bit_ == 8 && inner::IsInDisp8(imm)) { - db(B01101010); db(imm); - } else if (af.bit_ == 16 && isInDisp16(imm)) { - db(0x66); db(B01101000); dw(imm); - } else { - db(B01101000); dd(imm); - } - } - /* use "push(word, 4)" if you want "push word 4" */ - void push(uint32 imm) - { - if (inner::IsInDisp8(imm)) { - push(byte, imm); - } else { - push(dword, imm); - } - } - void bswap(const Reg32e& reg) - { - opModR(Reg32(1), reg, 0x0F); - } - void mov(const Operand& reg1, const Operand& reg2) - { - const Reg *reg = 0; - const Address *addr = 0; - uint8 code = 0; - if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp] - reg = &static_cast(reg1); - addr= &static_cast(reg2); - code = B10100000; - } else - if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al - reg = &static_cast(reg2); - addr= &static_cast(reg1); - code = B10100010; - } -#ifdef XBYAK64 - if (addr && addr->is64bitDisp()) { - if (code) { - rex(*reg); - db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3); - db(addr->getDisp(), 8); - } else { - throw Error(ERR_BAD_COMBINATION); - } - } else -#else - if (code && addr->isOnlyDisp()) { - rex(*reg, *addr); - db(code | (reg->isBit(8) ? 0 : 1)); - dd(static_cast(addr->getDisp())); - } else -#endif - { - opRM_RM(reg1, reg2, B10001000); - } - } -private: - /* - mov(r, imm) = db(imm, mov_imm(r, imm)) - */ - int mov_imm(const Reg& reg, size_t imm) - { - int bit = reg.getBit(); - const int idx = reg.getIdx(); - int code = B10110000 | ((bit == 8 ? 0 : 1) << 3); - if (bit == 64 && (imm & ~size_t(0xffffffffu)) == 0) { - rex(Reg32(idx)); - bit = 32; - } else { - rex(reg); - if (bit == 64 && inner::IsInInt32(imm)) { - db(B11000111); - code = B11000000; - bit = 32; - } - } - db(code | (idx & 7)); - return bit / 8; - } - template - void putL_inner(T& label, bool relative = false, size_t disp = 0) - { - const int jmpSize = relative ? 4 : (int)sizeof(size_t); - if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); - size_t offset = 0; - if (labelMgr_.getOffset(&offset, label)) { - if (relative) { - db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize); - } else if (isAutoGrow()) { - db(uint64(0), jmpSize); - save(size_ - jmpSize, offset, jmpSize, inner::LaddTop); - } else { - db(size_t(top_) + offset, jmpSize); - } - return; - } - db(uint64(0), jmpSize); - JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp); - labelMgr_.addUndefinedLabel(label, jmp); - } -public: - void mov(const Operand& op, size_t imm) - { - verifyMemHasSize(op); - if (op.isREG()) { - const int size = mov_imm(static_cast(op), imm); - db(imm, size); - } else if (op.isMEM()) { - opModM(static_cast(op), Reg(0, Operand::REG, op.getBit()), B11000110); - int size = op.getBit() / 8; if (size > 4) size = 4; - db(static_cast(imm), size); - } else { - throw Error(ERR_BAD_COMBINATION); - } - } - void mov(const NativeReg& reg, const char *label) // can't use std::string - { - if (label == 0) { - mov(static_cast(reg), 0); // call imm - return; - } - mov_imm(reg, dummyAddr); - putL(label); - } - void mov(const NativeReg& reg, const Label& label) - { - mov_imm(reg, dummyAddr); - putL(label); - } - void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); } - void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); } - /* - put address of label to buffer - @note the put size is 4(32-bit), 8(64-bit) - */ - void putL(std::string label) { putL_inner(label); } - void putL(const Label& label) { putL_inner(label); } - void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } - void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); } - void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B11000111); } -#ifdef XBYAK64 - void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, B11000111); } -#endif - void xadd(const Operand& op, const Reg& reg) - { - opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, B11000000 | (reg.isBit(8) ? 0 : 1)); - } - void cmpxchg(const Operand& op, const Reg& reg) - { - opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xb0 | (reg.isBit(8) ? 0 : 1)); - } - void xchg(const Operand& op1, const Operand& op2) - { - const Operand *p1 = &op1, *p2 = &op2; - if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) { - p1 = &op2; p2 = &op1; - } - if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION); - if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0) -#ifdef XBYAK64 - && (p2->getIdx() != 0 || !p1->isREG(32)) -#endif - ) { - rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7)); - return; - } - opModRM(*p1, *p2, (p1->isREG() && p2->isREG() && (p1->getBit() == p2->getBit())), p2->isMEM(), B10000110 | (p1->isBit(8) ? 0 : 1)); - } - void call(std::string label) { opJmp(label, T_NEAR, 0, B11101000, 0); } - // call(string label) - void call(const char *label) { call(std::string(label)); } - void call(const Label& label) { opJmp(label, T_NEAR, 0, B11101000, 0); } - // call(function pointer) -#ifdef XBYAK_VARIADIC_TEMPLATE - template - void call(Ret(*func)(Params...)) { call(CastTo(func)); } -#endif - void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, B11101000); } - // special case - void movd(const Address& addr, const Mmx& mmx) - { - if (mmx.isXMM()) db(0x66); - opModM(addr, mmx, 0x0F, B01111110); - } - void movd(const Reg32& reg, const Mmx& mmx) - { - if (mmx.isXMM()) db(0x66); - opModR(mmx, reg, 0x0F, B01111110); - } - void movd(const Mmx& mmx, const Address& addr) - { - if (mmx.isXMM()) db(0x66); - opModM(addr, mmx, 0x0F, B01101110); - } - void movd(const Mmx& mmx, const Reg32& reg) - { - if (mmx.isXMM()) db(0x66); - opModR(mmx, reg, 0x0F, B01101110); - } - void movq2dq(const Xmm& xmm, const Mmx& mmx) - { - db(0xF3); opModR(xmm, mmx, 0x0F, B11010110); - } - void movdq2q(const Mmx& mmx, const Xmm& xmm) - { - db(0xF2); opModR(mmx, xmm, 0x0F, B11010110); - } - void movq(const Mmx& mmx, const Operand& op) - { - if (mmx.isXMM()) db(0xF3); - opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? B01111110 : B01101111); - } - void movq(const Address& addr, const Mmx& mmx) - { - if (mmx.isXMM()) db(0x66); - opModM(addr, mmx, 0x0F, mmx.isXMM() ? B11010110 : B01111111); - } -#ifdef XBYAK64 - void movq(const Reg64& reg, const Mmx& mmx) - { - if (mmx.isXMM()) db(0x66); - opModR(mmx, reg, 0x0F, B01111110); - } - void movq(const Mmx& mmx, const Reg64& reg) - { - if (mmx.isXMM()) db(0x66); - opModR(mmx, reg, 0x0F, B01101110); - } - void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) - { - if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); - opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, B00111010); // force to 64bit - } - void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) - { - if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); - opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, B00111010); // force to 64bit - } - void movsxd(const Reg64& reg, const Operand& op) - { - if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); - opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); - } -#endif - // MMX2 : pextrw : reg, mmx/xmm, imm - // SSE4 : pextrw, pextrb, pextrd, extractps : reg/mem, mmx/xmm, imm - void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); } - void pextrb(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); } - void pextrd(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); } - void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); } - void pinsrw(const Mmx& mmx, const Operand& op, int imm) - { - if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); - opGen(mmx, op, B11000100, mmx.isXMM() ? 0x66 : NONE, 0, imm); - } - void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, B00111010); } - void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, B00111010); } - void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, B00111010); } - - void pmovmskb(const Reg32e& reg, const Mmx& mmx) - { - if (mmx.isXMM()) db(0x66); - opModR(reg, mmx, 0x0F, B11010111); - } - void maskmovq(const Mmx& reg1, const Mmx& reg2) - { - if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); - opModR(reg1, reg2, 0x0F, B11110111); - } - void lea(const Reg32e& reg, const Address& addr) { opModM(addr, reg, B10001101); } - - void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, B01010000); } - void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); } - void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, B00101011); } - void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); } - void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, B11110000); } - void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, B11000011); } - void movntq(const Address& addr, const Mmx& mmx) - { - if (!mmx.isMMX()) throw Error(ERR_BAD_COMBINATION); - opModM(addr, mmx, 0x0F, B11100111); - } - void crc32(const Reg32e& reg, const Operand& op) - { - if (reg.isBit(32) && op.isBit(16)) db(0x66); - db(0xF2); - opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); - } - void rdrand(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0f, 0xc7); } - void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0f, 0xc7); } - void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), MM_0F3A | PP_F2, 0xF0, false); db(imm); } - enum { NONE = 256 }; - CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0) - : CodeArray(maxSize, userPtr, allocator) - , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7) - , xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7) - , ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7) - , xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7) // for my convenience - , ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7) // for my convenience - , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI) - , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI) - , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH) - , ptr(0), byte(8), word(16), dword(32), qword(64) - , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7) -#ifdef XBYAK64 - , rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15) - , r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D) - , r8w(Operand::R8W), r9w(Operand::R9W), r10w(Operand::R10W), r11w(Operand::R11W), r12w(Operand::R12W), r13w(Operand::R13W), r14w(Operand::R14W), r15w(Operand::R15W) - , r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B) - , spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true) - , xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15) - , ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15) - , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) // for my convenience - , ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15) // for my convenience - , rip() -#endif - { - labelMgr_.set(this); - } - void reset() - { - resetSize(); - labelMgr_.reset(); - labelMgr_.set(this); - } - bool hasUndefinedLabel() const { return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel(); } - /* - call ready() to complete generating code on AutoGrow - */ - void ready() - { - if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND); - calcJmpAddress(); - } -#ifdef XBYAK_TEST - void dump(bool doClear = true) - { - CodeArray::dump(); - if (doClear) size_ = 0; - } -#endif - -#ifndef XBYAK_DONT_READ_LIST -#include "xbyak_mnemonic.h" - void align(int x = 16) - { - if (x == 1) return; - if (x < 1 || (x & (x - 1))) throw Error(ERR_BAD_ALIGN); - if (isAutoGrow() && x > (int)inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", x); - while (size_t(getCurr()) % x) { - nop(); - } - } -#endif -}; - -namespace util { -static const Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); -static const Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); -static const Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); -static const Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); -static const Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); -static const Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); -static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64); -static const Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); -#ifdef XBYAK64 -static const Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); -static const Reg32 r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D); -static const Reg16 r8w(Operand::R8W), r9w(Operand::R9W), r10w(Operand::R10W), r11w(Operand::R11W), r12w(Operand::R12W), r13w(Operand::R13W), r14w(Operand::R14W), r15w(Operand::R15W); -static const Reg8 r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B), spl(Operand::SPL, 1), bpl(Operand::BPL, 1), sil(Operand::SIL, 1), dil(Operand::DIL, 1); -static const Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); -static const Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); -static const RegRip rip; -#endif -} // util - -#ifdef _MSC_VER - #pragma warning(pop) -#endif - -} // end of namespace - -#endif // XBYAK_XBYAK_H_ diff --git a/plugins/GSdx_legacy/xbyak/xbyak_bin2hex.h b/plugins/GSdx_legacy/xbyak/xbyak_bin2hex.h deleted file mode 100644 index 1eb447f4db..0000000000 --- a/plugins/GSdx_legacy/xbyak/xbyak_bin2hex.h +++ /dev/null @@ -1,286 +0,0 @@ -/* Copyright (c) 2007 MITSUNARI Shigeo -* All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are met: -* -* Redistributions of source code must retain the above copyright notice, this -* list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, -* this list of conditions and the following disclaimer in the documentation -* and/or other materials provided with the distribution. -* Neither the name of the copyright owner nor the names of its contributors may -* be used to endorse or promote products derived from this software without -* specific prior written permission. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -* THE POSSIBILITY OF SUCH DAMAGE. -*/ - -enum { - B00000000= 0, - B00000001= 1, - B00000010= 2, - B00000011= 3, - B00000100= 4, - B00000101= 5, - B00000110= 6, - B00000111= 7, - B00001000= 8, - B00001001= 9, - B00001010= 10, - B00001011= 11, - B00001100= 12, - B00001101= 13, - B00001110= 14, - B00001111= 15, - B00010000= 16, - B00010001= 17, - B00010010= 18, - B00010011= 19, - B00010100= 20, - B00010101= 21, - B00010110= 22, - B00010111= 23, - B00011000= 24, - B00011001= 25, - B00011010= 26, - B00011011= 27, - B00011100= 28, - B00011101= 29, - B00011110= 30, - B00011111= 31, - B00100000= 32, - B00100001= 33, - B00100010= 34, - B00100011= 35, - B00100100= 36, - B00100101= 37, - B00100110= 38, - B00100111= 39, - B00101000= 40, - B00101001= 41, - B00101010= 42, - B00101011= 43, - B00101100= 44, - B00101101= 45, - B00101110= 46, - B00101111= 47, - B00110000= 48, - B00110001= 49, - B00110010= 50, - B00110011= 51, - B00110100= 52, - B00110101= 53, - B00110110= 54, - B00110111= 55, - B00111000= 56, - B00111001= 57, - B00111010= 58, - B00111011= 59, - B00111100= 60, - B00111101= 61, - B00111110= 62, - B00111111= 63, - B01000000= 64, - B01000001= 65, - B01000010= 66, - B01000011= 67, - B01000100= 68, - B01000101= 69, - B01000110= 70, - B01000111= 71, - B01001000= 72, - B01001001= 73, - B01001010= 74, - B01001011= 75, - B01001100= 76, - B01001101= 77, - B01001110= 78, - B01001111= 79, - B01010000= 80, - B01010001= 81, - B01010010= 82, - B01010011= 83, - B01010100= 84, - B01010101= 85, - B01010110= 86, - B01010111= 87, - B01011000= 88, - B01011001= 89, - B01011010= 90, - B01011011= 91, - B01011100= 92, - B01011101= 93, - B01011110= 94, - B01011111= 95, - B01100000= 96, - B01100001= 97, - B01100010= 98, - B01100011= 99, - B01100100= 100, - B01100101= 101, - B01100110= 102, - B01100111= 103, - B01101000= 104, - B01101001= 105, - B01101010= 106, - B01101011= 107, - B01101100= 108, - B01101101= 109, - B01101110= 110, - B01101111= 111, - B01110000= 112, - B01110001= 113, - B01110010= 114, - B01110011= 115, - B01110100= 116, - B01110101= 117, - B01110110= 118, - B01110111= 119, - B01111000= 120, - B01111001= 121, - B01111010= 122, - B01111011= 123, - B01111100= 124, - B01111101= 125, - B01111110= 126, - B01111111= 127, - B10000000= 128, - B10000001= 129, - B10000010= 130, - B10000011= 131, - B10000100= 132, - B10000101= 133, - B10000110= 134, - B10000111= 135, - B10001000= 136, - B10001001= 137, - B10001010= 138, - B10001011= 139, - B10001100= 140, - B10001101= 141, - B10001110= 142, - B10001111= 143, - B10010000= 144, - B10010001= 145, - B10010010= 146, - B10010011= 147, - B10010100= 148, - B10010101= 149, - B10010110= 150, - B10010111= 151, - B10011000= 152, - B10011001= 153, - B10011010= 154, - B10011011= 155, - B10011100= 156, - B10011101= 157, - B10011110= 158, - B10011111= 159, - B10100000= 160, - B10100001= 161, - B10100010= 162, - B10100011= 163, - B10100100= 164, - B10100101= 165, - B10100110= 166, - B10100111= 167, - B10101000= 168, - B10101001= 169, - B10101010= 170, - B10101011= 171, - B10101100= 172, - B10101101= 173, - B10101110= 174, - B10101111= 175, - B10110000= 176, - B10110001= 177, - B10110010= 178, - B10110011= 179, - B10110100= 180, - B10110101= 181, - B10110110= 182, - B10110111= 183, - B10111000= 184, - B10111001= 185, - B10111010= 186, - B10111011= 187, - B10111100= 188, - B10111101= 189, - B10111110= 190, - B10111111= 191, - B11000000= 192, - B11000001= 193, - B11000010= 194, - B11000011= 195, - B11000100= 196, - B11000101= 197, - B11000110= 198, - B11000111= 199, - B11001000= 200, - B11001001= 201, - B11001010= 202, - B11001011= 203, - B11001100= 204, - B11001101= 205, - B11001110= 206, - B11001111= 207, - B11010000= 208, - B11010001= 209, - B11010010= 210, - B11010011= 211, - B11010100= 212, - B11010101= 213, - B11010110= 214, - B11010111= 215, - B11011000= 216, - B11011001= 217, - B11011010= 218, - B11011011= 219, - B11011100= 220, - B11011101= 221, - B11011110= 222, - B11011111= 223, - B11100000= 224, - B11100001= 225, - B11100010= 226, - B11100011= 227, - B11100100= 228, - B11100101= 229, - B11100110= 230, - B11100111= 231, - B11101000= 232, - B11101001= 233, - B11101010= 234, - B11101011= 235, - B11101100= 236, - B11101101= 237, - B11101110= 238, - B11101111= 239, - B11110000= 240, - B11110001= 241, - B11110010= 242, - B11110011= 243, - B11110100= 244, - B11110101= 245, - B11110110= 246, - B11110111= 247, - B11111000= 248, - B11111001= 249, - B11111010= 250, - B11111011= 251, - B11111100= 252, - B11111101= 253, - B11111110= 254, - B11111111= 255 -}; diff --git a/plugins/GSdx_legacy/xbyak/xbyak_mnemonic.h b/plugins/GSdx_legacy/xbyak/xbyak_mnemonic.h deleted file mode 100644 index d551c61323..0000000000 --- a/plugins/GSdx_legacy/xbyak/xbyak_mnemonic.h +++ /dev/null @@ -1,1489 +0,0 @@ -/* Copyright (c) 2007 MITSUNARI Shigeo -* All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are met: -* -* Redistributions of source code must retain the above copyright notice, this -* list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, -* this list of conditions and the following disclaimer in the documentation -* and/or other materials provided with the distribution. -* Neither the name of the copyright owner nor the names of its contributors may -* be used to endorse or promote products derived from this software without -* specific prior written permission. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -* THE POSSIBILITY OF SUCH DAMAGE. -*/ - -const char *getVersionString() const { return "4.84"; } -void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); } -void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); } -void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); } -void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); } -void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); } -void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); } -void pmulhuw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE4); } -void pmulhw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE5); } -void pmullw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD5); } -void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); } -void punpckhbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x68); } -void punpckhwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x69); } -void punpckhdq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6A); } -void punpcklbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x60); } -void punpcklwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x61); } -void punpckldq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x62); } -void pxor(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEF); } -void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); } -void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); } -void pmaxsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEE); } -void pmaxub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDE); } -void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); } -void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); } -void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); } -void paddq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD4); } -void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); } -void psubq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFB); } -void paddb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFC); } -void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); } -void paddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFE); } -void paddsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEC); } -void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); } -void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); } -void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); } -void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); } -void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); } -void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); } -void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); } -void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); } -void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); } -void psllw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF1); } -void pslld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF2); } -void psllq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF3); } -void psraw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE1); } -void psrad(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE2); } -void psrlw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD1); } -void psrld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD2); } -void psrlq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD3); } -void psubb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF8); } -void psubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF9); } -void psubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFA); } -void psubsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE8); } -void psubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE9); } -void psubusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD8); } -void psubusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD9); } -void psllw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 6); } -void pslld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 6); } -void psllq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 6); } -void psraw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 4); } -void psrad(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 4); } -void psrlw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 2); } -void psrld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 2); } -void psrlq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 2); } -void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 7); } -void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 3); } -void pshufw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); } -void pshuflw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); } -void pshufhw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); } -void pshufd(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); } -void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); } -void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); } -void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0xF3); } -void movdqu(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x7F); } -void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x100); } -void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); } -void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF3); } -void movss(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x11); } -void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); } -void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); } -void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); } -void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); } -void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF2); } -void movsd(const Address& addr, const Xmm& xmm) { db(0xF2); opModM(addr, xmm, 0x0F, 0x11); } -void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); } -void movupd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x11); } -void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); } -void addss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF3, isXMM_XMMorMEM); } -void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); } -void addsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF2, isXMM_XMMorMEM); } -void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); } -void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); } -void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); } -void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); } -void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); } -void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); } -void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); } -void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); } -void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); } -void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); } -void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM_XMMorMEM); } -void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); } -void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); } -void maxss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF3, isXMM_XMMorMEM); } -void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); } -void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); } -void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXMM_XMMorMEM); } -void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); } -void minpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x66, isXMM_XMMorMEM); } -void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); } -void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); } -void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); } -void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); } -void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM_XMMorMEM); } -void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); } -void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); } -void rcpps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0x100, isXMM_XMMorMEM); } -void rcpss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0xF3, isXMM_XMMorMEM); } -void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); } -void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); } -void shufps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); } -void shufpd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); } -void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); } -void sqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF3, isXMM_XMMorMEM); } -void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); } -void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); } -void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); } -void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); } -void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); } -void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); } -void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); } -void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); } -void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); } -void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); } -void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); } -void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); } -void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); } -void movhlps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x12); } -void movlhps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x16); } -void punpckhqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6D, 0x66, isXMM_XMMorMEM); } -void punpcklqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6C, 0x66, isXMM_XMMorMEM); } -void comiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x100, isXMM_XMMorMEM); } -void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); } -void comisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x66, isXMM_XMMorMEM); } -void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); } -void cvtpd2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x66, isXMM_XMMorMEM); } -void cvtps2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x100, isXMM_XMMorMEM); } -void cvtsd2ss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF2, isXMM_XMMorMEM); } -void cvtss2sd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF3, isXMM_XMMorMEM); } -void cvtpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF2, isXMM_XMMorMEM); } -void cvttpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0x66, isXMM_XMMorMEM); } -void cvtdq2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF3, isXMM_XMMorMEM); } -void cvtps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x66, isXMM_XMMorMEM); } -void cvttps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0xF3, isXMM_XMMorMEM); } -void cvtdq2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x100, isXMM_XMMorMEM); } -void addsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0x66, isXMM_XMMorMEM); } -void addsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0xF2, isXMM_XMMorMEM); } -void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); } -void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); } -void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); } -void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); } -void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM); } -void movshdup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x16, 0xF3, isXMM_XMMorMEM); } -void movsldup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF3, isXMM_XMMorMEM); } -void cvtpi2ps(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x100, isXMM_MMXorMEM); } -void cvtps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x100, isMMX_XMMorMEM); } -void cvtsi2ss(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF3, isXMM_REG32orMEM); } -void cvtss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF3, isREG32_XMMorMEM); } -void cvttps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x100, isMMX_XMMorMEM); } -void cvttss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF3, isREG32_XMMorMEM); } -void cvtpi2pd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x66, isXMM_MMXorMEM); } -void cvtpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x66, isMMX_XMMorMEM); } -void cvtsi2sd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF2, isXMM_REG32orMEM); } -void cvtsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF2, isREG32_XMMorMEM); } -void cvttpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x66, isMMX_XMMorMEM); } -void cvttsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF2, isREG32_XMMorMEM); } -void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B00011000); } -void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, B00011000); } -void prefetcht2(const Address& addr) { opModM(addr, Reg32(3), 0x0F, B00011000); } -void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, B00011000); } -void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x100); } -void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x100); } -void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x66); } -void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); } -void cmovo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 0); } -void jo(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); } -void jo(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); } -void seto(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 0); } -void cmovno(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 1); } -void jno(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); } -void jno(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); } -void setno(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 1); } -void cmovb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); } -void jb(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } -void jb(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } -void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); } -void cmovc(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); } -void jc(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } -void jc(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } -void setc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); } -void cmovnae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); } -void jnae(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } -void jnae(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } -void setnae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); } -void cmovnb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); } -void jnb(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } -void jnb(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } -void setnb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); } -void cmovae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); } -void jae(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } -void jae(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } -void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); } -void cmovnc(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); } -void jnc(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } -void jnc(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } -void setnc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); } -void cmove(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); } -void je(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); } -void je(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); } -void sete(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 4); } -void cmovz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); } -void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); } -void jz(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); } -void setz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 4); } -void cmovne(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); } -void jne(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); } -void jne(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); } -void setne(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 5); } -void cmovnz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); } -void jnz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); } -void jnz(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); } -void setnz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 5); } -void cmovbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); } -void jbe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); } -void jbe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); } -void setbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 6); } -void cmovna(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); } -void jna(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); } -void jna(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); } -void setna(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 6); } -void cmovnbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); } -void jnbe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); } -void jnbe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); } -void setnbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 7); } -void cmova(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); } -void ja(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); } -void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); } -void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 7); } -void cmovs(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 8); } -void js(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); } -void js(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); } -void sets(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 8); } -void cmovns(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 9); } -void jns(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); } -void jns(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); } -void setns(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 9); } -void cmovp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); } -void jp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); } -void jp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); } -void setp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 10); } -void cmovpe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); } -void jpe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); } -void jpe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); } -void setpe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 10); } -void cmovnp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); } -void jnp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); } -void jnp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); } -void setnp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 11); } -void cmovpo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); } -void jpo(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); } -void jpo(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); } -void setpo(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 11); } -void cmovl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); } -void jl(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); } -void jl(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); } -void setl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 12); } -void cmovnge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); } -void jnge(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); } -void jnge(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); } -void setnge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 12); } -void cmovnl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); } -void jnl(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); } -void jnl(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); } -void setnl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 13); } -void cmovge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); } -void jge(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); } -void jge(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); } -void setge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 13); } -void cmovle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); } -void jle(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); } -void jle(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); } -void setle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 14); } -void cmovng(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); } -void jng(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); } -void jng(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); } -void setng(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 14); } -void cmovnle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); } -void jnle(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } -void jnle(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } -void setnle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); } -void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); } -void jg(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } -void jg(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } -void setg(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); } -#ifdef XBYAK32 -void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } -void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } -void jecxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } -void jecxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } -#else -void jecxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } -void jecxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } -void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } -void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } -#endif -#ifdef XBYAK64 -void cdqe() { db(0x48); db(0x98); } -void cqo() { db(0x48); db(0x99); } -#else -void aaa() { db(0x37); } -void aad() { db(0xD5); db(0x0A); } -void aam() { db(0xD4); db(0x0A); } -void aas() { db(0x3F); } -void daa() { db(0x27); } -void das() { db(0x2F); } -void popad() { db(0x61); } -void popfd() { db(0x9D); } -void pusha() { db(0x60); } -void pushad() { db(0x60); } -void pushfd() { db(0x9C); } -void popa() { db(0x61); } -#endif -void cbw() { db(0x66); db(0x98); } -void cdq() { db(0x99); } -void clc() { db(0xF8); } -void cld() { db(0xFC); } -void cli() { db(0xFA); } -void cmc() { db(0xF5); } -void cpuid() { db(0x0F); db(0xA2); } -void cwd() { db(0x66); db(0x99); } -void cwde() { db(0x98); } -void lahf() { db(0x9F); } -void lock() { db(0xF0); } -void nop() { db(0x90); } -void sahf() { db(0x9E); } -void stc() { db(0xF9); } -void std() { db(0xFD); } -void sti() { db(0xFB); } -void emms() { db(0x0F); db(0x77); } -void pause() { db(0xF3); db(0x90); } -void sfence() { db(0x0F); db(0xAE); db(0xF8); } -void lfence() { db(0x0F); db(0xAE); db(0xE8); } -void mfence() { db(0x0F); db(0xAE); db(0xF0); } -void monitor() { db(0x0F); db(0x01); db(0xC8); } -void mwait() { db(0x0F); db(0x01); db(0xC9); } -void rdmsr() { db(0x0F); db(0x32); } -void rdpmc() { db(0x0F); db(0x33); } -void rdtsc() { db(0x0F); db(0x31); } -void rdtscp() { db(0x0F); db(0x01); db(0xF9); } -void ud2() { db(0x0F); db(0x0B); } -void wait() { db(0x9B); } -void fwait() { db(0x9B); } -void wbinvd() { db(0x0F); db(0x09); } -void wrmsr() { db(0x0F); db(0x30); } -void xlatb() { db(0xD7); } -void popf() { db(0x9D); } -void pushf() { db(0x9C); } -void stac() { db(0x0F); db(0x01); db(0xCB); } -void vzeroall() { db(0xC5); db(0xFC); db(0x77); } -void vzeroupper() { db(0xC5); db(0xF8); db(0x77); } -void xgetbv() { db(0x0F); db(0x01); db(0xD0); } -void f2xm1() { db(0xD9); db(0xF0); } -void fabs() { db(0xD9); db(0xE1); } -void faddp() { db(0xDE); db(0xC1); } -void fchs() { db(0xD9); db(0xE0); } -void fcom() { db(0xD8); db(0xD1); } -void fcomp() { db(0xD8); db(0xD9); } -void fcompp() { db(0xDE); db(0xD9); } -void fcos() { db(0xD9); db(0xFF); } -void fdecstp() { db(0xD9); db(0xF6); } -void fdivp() { db(0xDE); db(0xF9); } -void fdivrp() { db(0xDE); db(0xF1); } -void fincstp() { db(0xD9); db(0xF7); } -void finit() { db(0x9B); db(0xDB); db(0xE3); } -void fninit() { db(0xDB); db(0xE3); } -void fld1() { db(0xD9); db(0xE8); } -void fldl2t() { db(0xD9); db(0xE9); } -void fldl2e() { db(0xD9); db(0xEA); } -void fldpi() { db(0xD9); db(0xEB); } -void fldlg2() { db(0xD9); db(0xEC); } -void fldln2() { db(0xD9); db(0xED); } -void fldz() { db(0xD9); db(0xEE); } -void fmulp() { db(0xDE); db(0xC9); } -void fnop() { db(0xD9); db(0xD0); } -void fpatan() { db(0xD9); db(0xF3); } -void fprem() { db(0xD9); db(0xF8); } -void fprem1() { db(0xD9); db(0xF5); } -void fptan() { db(0xD9); db(0xF2); } -void frndint() { db(0xD9); db(0xFC); } -void fscale() { db(0xD9); db(0xFD); } -void fsin() { db(0xD9); db(0xFE); } -void fsincos() { db(0xD9); db(0xFB); } -void fsqrt() { db(0xD9); db(0xFA); } -void fsubp() { db(0xDE); db(0xE9); } -void fsubrp() { db(0xDE); db(0xE1); } -void ftst() { db(0xD9); db(0xE4); } -void fucom() { db(0xDD); db(0xE1); } -void fucomp() { db(0xDD); db(0xE9); } -void fucompp() { db(0xDA); db(0xE9); } -void fxam() { db(0xD9); db(0xE5); } -void fxch() { db(0xD9); db(0xC9); } -void fxtract() { db(0xD9); db(0xF4); } -void fyl2x() { db(0xD9); db(0xF1); } -void fyl2xp1() { db(0xD9); db(0xF9); } -void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } -void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } -void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); } -void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); } -void and_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); } -void and_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); } -#ifndef XBYAK_NO_OP_NAMES -void and(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); } -void and(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); } -#endif -void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); } -void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); } -void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); } -void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); } -#ifndef XBYAK_NO_OP_NAMES -void or(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); } -void or(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); } -#endif -void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); } -void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); } -void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); } -void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); } -void xor_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); } -void xor_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); } -#ifndef XBYAK_NO_OP_NAMES -void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); } -void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); } -#endif -void dec(const Operand& op) { opIncDec(op, 0x48, 1); } -void inc(const Operand& op) { opIncDec(op, 0x40, 0); } -void bt(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xa3); } -void bt(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba); db(imm); } -void bts(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xab); } -void bts(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba); db(imm); } -void btr(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xb3); } -void btr(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba); db(imm); } -void btc(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xbb); } -void btc(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba); db(imm); } -void div(const Operand& op) { opR_ModM(op, 0, 6, 0xF6); } -void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); } -void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); } -void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); } -void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); } -void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); } -#ifndef XBYAK_NO_OP_NAMES -void not(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); } -#endif -void rcl(const Operand& op, int imm) { opShift(op, imm, 2); } -void rcl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 2); } -void rcr(const Operand& op, int imm) { opShift(op, imm, 3); } -void rcr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3); } -void rol(const Operand& op, int imm) { opShift(op, imm, 0); } -void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); } -void ror(const Operand& op, int imm) { opShift(op, imm, 1); } -void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); } -void sar(const Operand& op, int imm) { opShift(op, imm, 7); } -void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 7); } -void shl(const Operand& op, int imm) { opShift(op, imm, 4); } -void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); } -void shr(const Operand& op, int imm) { opShift(op, imm, 5); } -void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 5); } -void sal(const Operand& op, int imm) { opShift(op, imm, 4); } -void sal(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); } -void shld(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); } -void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xA4, &_cl); } -void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); } -void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xAC, &_cl); } -void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); } -void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); } -void popcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); } -void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); } -void lzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); } -void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); } -void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); } -void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); } -void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); } -void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); } -void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); } -void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); } -void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); } -void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); } -void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); } -void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); } -void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, NONE, 0x38); } -void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); } -void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); } -void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); } -void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); } -void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void aesdec(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDE, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void aesdeclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void aesenc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDC, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void aesenclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDD, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void aesimc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDB, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void roundps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void roundpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void pcmpestrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void pcmpestri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void aeskeygenassist(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); } -void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); } -void pclmullqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); } -void pclmulhqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); } -void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); } -void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); } -void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); } -void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); } -void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); } -void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); } -void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); } -void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); } -void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); } -void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); } -void fiadd(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 0, 0); } -void fcom(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 2, 0); } -void fcomp(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 3, 0); } -void fdiv(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 6, 0); } -void fidiv(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 6, 0); } -void fdivr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 7, 0); } -void fidivr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 7, 0); } -void ficom(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 2, 0); } -void ficomp(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 3, 0); } -void fild(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 0, 5); } -void fist(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0x00, 2, 0); } -void fistp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 3, 7); } -void fisttp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDD, 1, 0); } -void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); } -void fmul(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 1, 0); } -void fimul(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 1, 0); } -void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); } -void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); } -void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); } -void fisub(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); } -void fsubr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); } -void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); } -void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); } -void fadd(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C0, 0xDCC0); } -void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); } -void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); } -void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); } -void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); } -void fcmove(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); } -void fcmove(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC8, 0x00C8); } -void fcmovbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); } -void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); } -void fcmovu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); } -void fcmovu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD8, 0x00D8); } -void fcmovnb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); } -void fcmovnb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC0, 0x00C0); } -void fcmovne(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); } -void fcmovne(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC8, 0x00C8); } -void fcmovnbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); } -void fcmovnbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD0, 0x00D0); } -void fcmovnu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); } -void fcmovnu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD8, 0x00D8); } -void fcomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); } -void fcomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBF0, 0x00F0); } -void fcomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); } -void fcomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFF0, 0x00F0); } -void fucomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); } -void fucomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBE8, 0x00E8); } -void fucomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); } -void fucomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFE8, 0x00E8); } -void fdiv(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); } -void fdiv(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F0, 0xDCF8); } -void fdivp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); } -void fdivp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF8); } -void fdivr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); } -void fdivr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F8, 0xDCF0); } -void fdivrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); } -void fdivrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF0); } -void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); } -void fmul(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C8, 0xDCC8); } -void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); } -void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); } -void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); } -void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); } -void fsubp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); } -void fsubp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE8); } -void fsubr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); } -void fsubr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E8, 0xDCE0); } -void fsubrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); } -void fsubrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE0); } -void fcom(const Fpu& reg) { opFpu(reg, 0xD8, 0xD0); } -void fcomp(const Fpu& reg) { opFpu(reg, 0xD8, 0xD8); } -void ffree(const Fpu& reg) { opFpu(reg, 0xDD, 0xC0); } -void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); } -void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); } -void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); } -void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); } -void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); } -void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); } -void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x58, true); } -void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x58, true); } -void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x58, false); } -void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x58, false); } -void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5C, true); } -void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5C, true); } -void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5C, false); } -void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5C, false); } -void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x59, true); } -void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x59, true); } -void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x59, false); } -void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x59, false); } -void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5E, true); } -void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5E, true); } -void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5E, false); } -void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5E, false); } -void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5F, true); } -void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5F, true); } -void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5F, false); } -void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5F, false); } -void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5D, true); } -void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5D, true); } -void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5D, false); } -void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5D, false); } -void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x54, true); } -void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x54, true); } -void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x55, true); } -void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x55, true); } -void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x56, true); } -void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x56, true); } -void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x57, true); } -void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x57, true); } -void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0D, true, 0); db(imm); } -void vblendpd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0D, true, 0); db(imm); } -void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0C, true, 0); db(imm); } -void vblendps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0C, true, 0); db(imm); } -void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); } -void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); } -void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); } -void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); } -void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); } -void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); } -void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); } -void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); } -void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x02, true, 0); db(imm); } -void vpblendd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x02, true, 0); db(imm); } -void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); } -void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); } -void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); } -void vroundss(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); } -void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); } -void vpclmulqdq(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); } -void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x0C, true, 0); } -void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x0D, true, 0); } -void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x47, true, 0); } -void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x47, true, 1); } -void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x46, true, 0); } -void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x45, true, 0); } -void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x45, true, 1); } -void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); } -void vcmppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); } -void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F, 0xC2, true, -1); db(imm); } -void vcmpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0xC2, true, -1); db(imm); } -void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0xC2, false, -1); db(imm); } -void vcmpsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0xC2, false, -1); db(imm); } -void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0xC2, false, -1); db(imm); } -void vcmpss(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0xC2, false, -1); db(imm); } -void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x5A, false, -1); } -void vcvtsd2ss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0x5A, false, -1); } -void vcvtss2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x5A, false, -1); } -void vcvtss2sd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x5A, false, -1); } -void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); } -void vinsertps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); } -void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x63, true, -1); } -void vpacksswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x63, true, -1); } -void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x6B, true, -1); } -void vpackssdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6B, true, -1); } -void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x67, true, -1); } -void vpackuswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x67, true, -1); } -void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x2B, true, -1); } -void vpackusdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x2B, true, -1); } -void vpaddb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFC, true, -1); } -void vpaddb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFC, true, -1); } -void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFD, true, -1); } -void vpaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFD, true, -1); } -void vpaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFE, true, -1); } -void vpaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFE, true, -1); } -void vpaddq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD4, true, -1); } -void vpaddq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD4, true, -1); } -void vpaddsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEC, true, -1); } -void vpaddsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEC, true, -1); } -void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xED, true, -1); } -void vpaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xED, true, -1); } -void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDC, true, -1); } -void vpaddusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDC, true, -1); } -void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDD, true, -1); } -void vpaddusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDD, true, -1); } -void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0F, true, -1); db(imm); } -void vpalignr(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0F, true, -1); db(imm); } -void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDB, true, -1); } -void vpand(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDB, true, -1); } -void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDF, true, -1); } -void vpandn(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDF, true, -1); } -void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE0, true, -1); } -void vpavgb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE0, true, -1); } -void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE3, true, -1); } -void vpavgw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE3, true, -1); } -void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x74, true, -1); } -void vpcmpeqb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x74, true, -1); } -void vpcmpeqw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x75, true, -1); } -void vpcmpeqw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x75, true, -1); } -void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x76, true, -1); } -void vpcmpeqd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x76, true, -1); } -void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x29, true, -1); } -void vpcmpeqq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x29, true, -1); } -void vpcmpgtb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x64, true, -1); } -void vpcmpgtb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x64, true, -1); } -void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x65, true, -1); } -void vpcmpgtw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x65, true, -1); } -void vpcmpgtd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x66, true, -1); } -void vpcmpgtd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x66, true, -1); } -void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x37, true, -1); } -void vpcmpgtq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x37, true, -1); } -void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x01, true, -1); } -void vphaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x01, true, -1); } -void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x02, true, -1); } -void vphaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x02, true, -1); } -void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x03, true, -1); } -void vphaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x03, true, -1); } -void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x05, true, -1); } -void vphsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x05, true, -1); } -void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x06, true, -1); } -void vphsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x06, true, -1); } -void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x07, true, -1); } -void vphsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x07, true, -1); } -void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF5, true, -1); } -void vpmaddwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF5, true, -1); } -void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x04, true, -1); } -void vpmaddubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x04, true, -1); } -void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3C, true, -1); } -void vpmaxsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3C, true, -1); } -void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEE, true, -1); } -void vpmaxsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEE, true, -1); } -void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3D, true, -1); } -void vpmaxsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3D, true, -1); } -void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDE, true, -1); } -void vpmaxub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDE, true, -1); } -void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3E, true, -1); } -void vpmaxuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3E, true, -1); } -void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3F, true, -1); } -void vpmaxud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3F, true, -1); } -void vpminsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x38, true, -1); } -void vpminsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x38, true, -1); } -void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEA, true, -1); } -void vpminsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEA, true, -1); } -void vpminsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x39, true, -1); } -void vpminsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x39, true, -1); } -void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDA, true, -1); } -void vpminub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDA, true, -1); } -void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3A, true, -1); } -void vpminuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3A, true, -1); } -void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3B, true, -1); } -void vpminud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3B, true, -1); } -void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE4, true, -1); } -void vpmulhuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE4, true, -1); } -void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x0B, true, -1); } -void vpmulhrsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0B, true, -1); } -void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE5, true, -1); } -void vpmulhw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE5, true, -1); } -void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD5, true, -1); } -void vpmullw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD5, true, -1); } -void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x40, true, -1); } -void vpmulld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x40, true, -1); } -void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF4, false, -1); } -void vpmuludq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF4, false, -1); } -void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x28, true, -1); } -void vpmuldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x28, true, -1); } -void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEB, true, -1); } -void vpor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEB, true, -1); } -void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF6, true, -1); } -void vpsadbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF6, true, -1); } -void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x00, true, -1); } -void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x08, true, -1); } -void vpsignb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x08, true, -1); } -void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x09, true, -1); } -void vpsignw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x09, true, -1); } -void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x0A, true, -1); } -void vpsignd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0A, true, -1); } -void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF1, true, -1); } -void vpsllw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF1, true, -1); } -void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF2, true, -1); } -void vpslld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF2, true, -1); } -void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF3, true, -1); } -void vpsllq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF3, true, -1); } -void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE1, true, -1); } -void vpsraw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE1, true, -1); } -void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE2, true, -1); } -void vpsrad(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE2, true, -1); } -void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD1, true, -1); } -void vpsrlw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD1, true, -1); } -void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD2, true, -1); } -void vpsrld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD2, true, -1); } -void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD3, true, -1); } -void vpsrlq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD3, true, -1); } -void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF8, true, -1); } -void vpsubb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF8, true, -1); } -void vpsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF9, true, -1); } -void vpsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF9, true, -1); } -void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFA, true, -1); } -void vpsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFA, true, -1); } -void vpsubq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFB, true, -1); } -void vpsubq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFB, true, -1); } -void vpsubsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE8, true, -1); } -void vpsubsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE8, true, -1); } -void vpsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE9, true, -1); } -void vpsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE9, true, -1); } -void vpsubusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD8, true, -1); } -void vpsubusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD8, true, -1); } -void vpsubusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD9, true, -1); } -void vpsubusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD9, true, -1); } -void vpunpckhbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x68, true, -1); } -void vpunpckhbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x68, true, -1); } -void vpunpckhwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x69, true, -1); } -void vpunpckhwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x69, true, -1); } -void vpunpckhdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x6A, true, -1); } -void vpunpckhdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6A, true, -1); } -void vpunpckhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x6D, true, -1); } -void vpunpckhqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6D, true, -1); } -void vpunpcklbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x60, true, -1); } -void vpunpcklbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x60, true, -1); } -void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x61, true, -1); } -void vpunpcklwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x61, true, -1); } -void vpunpckldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x62, true, -1); } -void vpunpckldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x62, true, -1); } -void vpunpcklqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x6C, true, -1); } -void vpunpcklqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6C, true, -1); } -void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEF, true, -1); } -void vpxor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEF, true, -1); } -void vrcpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x53, false, -1); } -void vrcpss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x53, false, -1); } -void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x52, false, -1); } -void vrsqrtss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x52, false, -1); } -void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xC6, true, -1); db(imm); } -void vshufpd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC6, true, -1); db(imm); } -void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F, 0xC6, true, -1); db(imm); } -void vshufps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0xC6, true, -1); db(imm); } -void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x51, false, -1); } -void vsqrtsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0x51, false, -1); } -void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x51, false, -1); } -void vsqrtss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x51, false, -1); } -void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x15, true, -1); } -void vunpckhpd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x15, true, -1); } -void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F, 0x15, true, -1); } -void vunpckhps(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0x15, true, -1); } -void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x14, true, -1); } -void vunpcklpd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x14, true, -1); } -void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F, 0x14, true, -1); } -void vunpcklps(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0x14, true, -1); } -void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0xDF, false, 0, imm); } -void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x09, true, 0, imm); } -void vroundps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x08, true, 0, imm); } -void vpermilpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x05, true, 0, imm); } -void vpermilps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x04, true, 0, imm); } -void vpcmpestri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x61, false, 0, imm); } -void vpcmpestrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x60, false, 0, imm); } -void vpcmpistri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x63, false, 0, imm); } -void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x62, false, 0, imm); } -void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x0E, true, 0); } -void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x0F, true, 0); } -void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x2F, false, -1); } -void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x2F, false, -1); } -void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x5B, true, -1); } -void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x5B, true, -1); } -void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x5B, true, -1); } -void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x28, true, -1); } -void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x28, true, -1); } -void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x12, true, -1); } -void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x6F, true, -1); } -void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x6F, true, -1); } -void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x16, true, -1); } -void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); } -void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); } -void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); } -void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, true, -1); } -void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, true, -1); } -void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, true, -1); } -void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); } -void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, true, -1); } -void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, true, -1); } -void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, true, -1); } -void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, true, -1); } -void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x24, true, -1); } -void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x25, true, -1); } -void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x30, true, -1); } -void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x31, true, -1); } -void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x32, true, -1); } -void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x33, true, -1); } -void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x34, true, -1); } -void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x35, true, -1); } -void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, true, -1, imm); } -void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, true, -1, imm); } -void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, true, -1, imm); } -void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, true, -1); } -void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); } -void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); } -void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x51, true, -1); } -void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x51, true, -1); } -void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x2E, false, -1); } -void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x2E, false, -1); } -void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x29, true, -1); } -void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F, 0x29, true, -1); } -void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x7F, true, -1); } -void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_F3, 0x7F, true, -1); } -void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x11, true, -1); } -void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F, 0x11, true, -1); } -void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0xD0, true, -1); } -void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0xD0, true, -1); } -void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x7C, true, -1); } -void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x7C, true, -1); } -void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x7D, true, -1); } -void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x7D, true, -1); } -void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDC, false, 0); } -void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDD, false, 0); } -void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDE, false, 0); } -void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDF, false, 0); } -void vmaskmovps(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, MM_0F38 | PP_66, 0x2C, true, 0); } -void vmaskmovps(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, MM_0F38 | PP_66, 0x2E, true, 0); } -void vmaskmovpd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, MM_0F38 | PP_66, 0x2D, true, 0); } -void vmaskmovpd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, MM_0F38 | PP_66, 0x2F, true, 0); } -void vpmaskmovd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, MM_0F38 | PP_66, 0x8C, true, 0); } -void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, MM_0F38 | PP_66, 0x8E, true, 0); } -void vpmaskmovq(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, MM_0F38 | PP_66, 0x8C, true, 1); } -void vpmaskmovq(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, MM_0F38 | PP_66, 0x8E, true, 1); } -void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, MM_0F38 | PP_66, 0x36, true, 0); } -void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, MM_0F38 | PP_66, 0x16, true, 0); } -void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, MM_0F3A | PP_66, 0x00, true, 1, imm); } -void vpermpd(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, MM_0F3A | PP_66, 0x01, true, 1, imm); } -void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); } -void vcmpeqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 0); } -void vcmpeqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 0); } -void cmpltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 1); } -void vcmpltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 1); } -void vcmpltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 1); } -void cmplepd(const Xmm& x, const Operand& op) { cmppd(x, op, 2); } -void vcmplepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 2); } -void vcmplepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 2); } -void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); } -void vcmpunordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 3); } -void vcmpunordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 3); } -void cmpneqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 4); } -void vcmpneqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 4); } -void vcmpneqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 4); } -void cmpnltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 5); } -void vcmpnltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 5); } -void vcmpnltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 5); } -void cmpnlepd(const Xmm& x, const Operand& op) { cmppd(x, op, 6); } -void vcmpnlepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 6); } -void vcmpnlepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 6); } -void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); } -void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 7); } -void vcmpordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 7); } -void vcmpeq_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 8); } -void vcmpeq_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 8); } -void vcmpngepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 9); } -void vcmpngepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 9); } -void vcmpngtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 10); } -void vcmpngtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 10); } -void vcmpfalsepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 11); } -void vcmpfalsepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 11); } -void vcmpneq_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 12); } -void vcmpneq_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 12); } -void vcmpgepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 13); } -void vcmpgepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 13); } -void vcmpgtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 14); } -void vcmpgtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 14); } -void vcmptruepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 15); } -void vcmptruepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 15); } -void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); } -void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 16); } -void vcmplt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 17); } -void vcmplt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 17); } -void vcmple_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 18); } -void vcmple_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 18); } -void vcmpunord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 19); } -void vcmpunord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 19); } -void vcmpneq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 20); } -void vcmpneq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 20); } -void vcmpnlt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 21); } -void vcmpnlt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 21); } -void vcmpnle_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 22); } -void vcmpnle_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 22); } -void vcmpord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 23); } -void vcmpord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 23); } -void vcmpeq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 24); } -void vcmpeq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 24); } -void vcmpnge_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 25); } -void vcmpnge_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 25); } -void vcmpngt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 26); } -void vcmpngt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 26); } -void vcmpfalse_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 27); } -void vcmpfalse_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 27); } -void vcmpneq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 28); } -void vcmpneq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 28); } -void vcmpge_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 29); } -void vcmpge_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 29); } -void vcmpgt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 30); } -void vcmpgt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 30); } -void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); } -void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 31); } -void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); } -void vcmpeqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 0); } -void vcmpeqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 0); } -void cmpltps(const Xmm& x, const Operand& op) { cmpps(x, op, 1); } -void vcmpltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 1); } -void vcmpltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 1); } -void cmpleps(const Xmm& x, const Operand& op) { cmpps(x, op, 2); } -void vcmpleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 2); } -void vcmpleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 2); } -void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); } -void vcmpunordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 3); } -void vcmpunordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 3); } -void cmpneqps(const Xmm& x, const Operand& op) { cmpps(x, op, 4); } -void vcmpneqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 4); } -void vcmpneqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 4); } -void cmpnltps(const Xmm& x, const Operand& op) { cmpps(x, op, 5); } -void vcmpnltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 5); } -void vcmpnltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 5); } -void cmpnleps(const Xmm& x, const Operand& op) { cmpps(x, op, 6); } -void vcmpnleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 6); } -void vcmpnleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 6); } -void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); } -void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); } -void vcmpordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 7); } -void vcmpeq_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 8); } -void vcmpeq_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 8); } -void vcmpngeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 9); } -void vcmpngeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 9); } -void vcmpngtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 10); } -void vcmpngtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 10); } -void vcmpfalseps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 11); } -void vcmpfalseps(const Xmm& x, const Operand& op) { vcmpps(x, op, 11); } -void vcmpneq_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 12); } -void vcmpneq_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 12); } -void vcmpgeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 13); } -void vcmpgeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 13); } -void vcmpgtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 14); } -void vcmpgtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 14); } -void vcmptrueps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 15); } -void vcmptrueps(const Xmm& x, const Operand& op) { vcmpps(x, op, 15); } -void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); } -void vcmpeq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 16); } -void vcmplt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 17); } -void vcmplt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 17); } -void vcmple_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 18); } -void vcmple_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 18); } -void vcmpunord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 19); } -void vcmpunord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 19); } -void vcmpneq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 20); } -void vcmpneq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 20); } -void vcmpnlt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 21); } -void vcmpnlt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 21); } -void vcmpnle_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 22); } -void vcmpnle_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 22); } -void vcmpord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 23); } -void vcmpord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 23); } -void vcmpeq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 24); } -void vcmpeq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 24); } -void vcmpnge_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 25); } -void vcmpnge_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 25); } -void vcmpngt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 26); } -void vcmpngt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 26); } -void vcmpfalse_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 27); } -void vcmpfalse_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 27); } -void vcmpneq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 28); } -void vcmpneq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 28); } -void vcmpge_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 29); } -void vcmpge_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 29); } -void vcmpgt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 30); } -void vcmpgt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 30); } -void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); } -void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 31); } -void cmpeqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 0); } -void vcmpeqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 0); } -void vcmpeqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 0); } -void cmpltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 1); } -void vcmpltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 1); } -void vcmpltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 1); } -void cmplesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 2); } -void vcmplesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 2); } -void vcmplesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 2); } -void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); } -void vcmpunordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 3); } -void vcmpunordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 3); } -void cmpneqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 4); } -void vcmpneqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 4); } -void vcmpneqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 4); } -void cmpnltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 5); } -void vcmpnltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 5); } -void vcmpnltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 5); } -void cmpnlesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 6); } -void vcmpnlesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 6); } -void vcmpnlesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 6); } -void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); } -void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); } -void vcmpordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 7); } -void vcmpeq_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 8); } -void vcmpeq_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 8); } -void vcmpngesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 9); } -void vcmpngesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 9); } -void vcmpngtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 10); } -void vcmpngtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 10); } -void vcmpfalsesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 11); } -void vcmpfalsesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 11); } -void vcmpneq_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 12); } -void vcmpneq_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 12); } -void vcmpgesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 13); } -void vcmpgesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 13); } -void vcmpgtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 14); } -void vcmpgtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 14); } -void vcmptruesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 15); } -void vcmptruesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 15); } -void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); } -void vcmpeq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 16); } -void vcmplt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 17); } -void vcmplt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 17); } -void vcmple_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 18); } -void vcmple_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 18); } -void vcmpunord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 19); } -void vcmpunord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 19); } -void vcmpneq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 20); } -void vcmpneq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 20); } -void vcmpnlt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 21); } -void vcmpnlt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 21); } -void vcmpnle_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 22); } -void vcmpnle_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 22); } -void vcmpord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 23); } -void vcmpord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 23); } -void vcmpeq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 24); } -void vcmpeq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 24); } -void vcmpnge_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 25); } -void vcmpnge_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 25); } -void vcmpngt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 26); } -void vcmpngt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 26); } -void vcmpfalse_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 27); } -void vcmpfalse_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 27); } -void vcmpneq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 28); } -void vcmpneq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 28); } -void vcmpge_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 29); } -void vcmpge_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 29); } -void vcmpgt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 30); } -void vcmpgt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 30); } -void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); } -void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 31); } -void cmpeqss(const Xmm& x, const Operand& op) { cmpss(x, op, 0); } -void vcmpeqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 0); } -void vcmpeqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 0); } -void cmpltss(const Xmm& x, const Operand& op) { cmpss(x, op, 1); } -void vcmpltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 1); } -void vcmpltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 1); } -void cmpless(const Xmm& x, const Operand& op) { cmpss(x, op, 2); } -void vcmpless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 2); } -void vcmpless(const Xmm& x, const Operand& op) { vcmpss(x, op, 2); } -void cmpunordss(const Xmm& x, const Operand& op) { cmpss(x, op, 3); } -void vcmpunordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 3); } -void vcmpunordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 3); } -void cmpneqss(const Xmm& x, const Operand& op) { cmpss(x, op, 4); } -void vcmpneqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 4); } -void vcmpneqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 4); } -void cmpnltss(const Xmm& x, const Operand& op) { cmpss(x, op, 5); } -void vcmpnltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 5); } -void vcmpnltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 5); } -void cmpnless(const Xmm& x, const Operand& op) { cmpss(x, op, 6); } -void vcmpnless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 6); } -void vcmpnless(const Xmm& x, const Operand& op) { vcmpss(x, op, 6); } -void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); } -void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); } -void vcmpordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 7); } -void vcmpeq_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 8); } -void vcmpeq_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 8); } -void vcmpngess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 9); } -void vcmpngess(const Xmm& x, const Operand& op) { vcmpss(x, op, 9); } -void vcmpngtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 10); } -void vcmpngtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 10); } -void vcmpfalsess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 11); } -void vcmpfalsess(const Xmm& x, const Operand& op) { vcmpss(x, op, 11); } -void vcmpneq_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 12); } -void vcmpneq_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 12); } -void vcmpgess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 13); } -void vcmpgess(const Xmm& x, const Operand& op) { vcmpss(x, op, 13); } -void vcmpgtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 14); } -void vcmpgtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 14); } -void vcmptruess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 15); } -void vcmptruess(const Xmm& x, const Operand& op) { vcmpss(x, op, 15); } -void vcmpeq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 16); } -void vcmpeq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 16); } -void vcmplt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 17); } -void vcmplt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 17); } -void vcmple_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 18); } -void vcmple_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 18); } -void vcmpunord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 19); } -void vcmpunord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 19); } -void vcmpneq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 20); } -void vcmpneq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 20); } -void vcmpnlt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 21); } -void vcmpnlt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 21); } -void vcmpnle_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 22); } -void vcmpnle_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 22); } -void vcmpord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 23); } -void vcmpord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 23); } -void vcmpeq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 24); } -void vcmpeq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 24); } -void vcmpnge_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 25); } -void vcmpnge_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 25); } -void vcmpngt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 26); } -void vcmpngt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 26); } -void vcmpfalse_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 27); } -void vcmpfalse_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 27); } -void vcmpneq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 28); } -void vcmpneq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 28); } -void vcmpge_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 29); } -void vcmpge_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 29); } -void vcmpgt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 30); } -void vcmpgt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 30); } -void vcmptrue_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 31); } -void vcmptrue_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 31); } -void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x16, false); } -void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x17, false); } -void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, MM_0F, 0x16, false); } -void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F, 0x17, false); } -void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x12, false); } -void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x13, false); } -void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, MM_0F, 0x12, false); } -void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F, 0x13, false); } -void vfmadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x98, true, 1); } -void vfmadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA8, true, 1); } -void vfmadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB8, true, 1); } -void vfmadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x98, true, 0); } -void vfmadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA8, true, 0); } -void vfmadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB8, true, 0); } -void vfmadd132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x99, false, 1); } -void vfmadd213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA9, false, 1); } -void vfmadd231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB9, false, 1); } -void vfmadd132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x99, false, 0); } -void vfmadd213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA9, false, 0); } -void vfmadd231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB9, false, 0); } -void vfmaddsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x96, true, 1); } -void vfmaddsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA6, true, 1); } -void vfmaddsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB6, true, 1); } -void vfmaddsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x96, true, 0); } -void vfmaddsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA6, true, 0); } -void vfmaddsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB6, true, 0); } -void vfmsubadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x97, true, 1); } -void vfmsubadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA7, true, 1); } -void vfmsubadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB7, true, 1); } -void vfmsubadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x97, true, 0); } -void vfmsubadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA7, true, 0); } -void vfmsubadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB7, true, 0); } -void vfmsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9A, true, 1); } -void vfmsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAA, true, 1); } -void vfmsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBA, true, 1); } -void vfmsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9A, true, 0); } -void vfmsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAA, true, 0); } -void vfmsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBA, true, 0); } -void vfmsub132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9B, false, 1); } -void vfmsub213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAB, false, 1); } -void vfmsub231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBB, false, 1); } -void vfmsub132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9B, false, 0); } -void vfmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAB, false, 0); } -void vfmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBB, false, 0); } -void vfnmadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9C, true, 1); } -void vfnmadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAC, true, 1); } -void vfnmadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBC, true, 1); } -void vfnmadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9C, true, 0); } -void vfnmadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAC, true, 0); } -void vfnmadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBC, true, 0); } -void vfnmadd132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9D, false, 1); } -void vfnmadd213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAD, false, 1); } -void vfnmadd231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBD, false, 1); } -void vfnmadd132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9D, false, 0); } -void vfnmadd213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAD, false, 0); } -void vfnmadd231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBD, false, 0); } -void vfnmsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9E, true, 1); } -void vfnmsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAE, true, 1); } -void vfnmsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBE, true, 1); } -void vfnmsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9E, true, 0); } -void vfnmsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAE, true, 0); } -void vfnmsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBE, true, 0); } -void vfnmsub132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9F, false, 1); } -void vfnmsub213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAF, false, 1); } -void vfnmsub231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 1); } -void vfnmsub132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9F, false, 0); } -void vfnmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAF, false, 0); } -void vfnmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 0); } -void vaesimc(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0xDB, false, 0); } -void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x1A, true, 0); } -void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x5A, true, 0); } -void vbroadcastsd(const Ymm& y, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(y, op, MM_0F38 | PP_66, 0x19, true, 0); } -void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x18, true, 0); } -void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x78, true, 0); } -void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x79, true, 0); } -void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x58, true, 0); } -void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x59, true, 0); } -void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, y.isXMM() ? xm0 : ym0, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x19, true, 0); db(imm); } -void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, y.isXMM() ? xm0 : ym0, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x39, true, 0); db(imm); } -void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!(op.isREG(32) || op.isMEM()) || x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, op.isREG(), Operand::XMM, MM_0F3A | PP_66, 0x17, false, 0); db(imm); } -void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XMcvt(y1, y2, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x18, true, 0); db(imm); } -void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XMcvt(y1, y2, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x38, true, 0); db(imm); } -void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x06, true, 0); db(imm); } -void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x46, true, 0); db(imm); } -void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_F2, 0xF0, true, 0); } -void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, false, -1); } -void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); } -void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); } -void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x14, false); db(imm); } -void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); } -void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); } -void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x16, false, 0); db(imm); } -void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x20, false); db(imm); } -void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x20, false); db(imm); } -void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); } -void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); } -void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); } -void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); } -void vpmovmskb(const Reg32e& r, const Xmm& x) { bool isYMM= x.isYMM(); opAVX_X_X_XM(isYMM ? Ymm(r.getIdx()) : Xmm(r.getIdx()), isYMM ? ym0 : xm0, x, MM_0F | PP_66, 0xD7, true); } -void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym7 : xm7, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } -void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym7 : xm7, x, x, MM_0F | PP_66, 0x73, true); db(imm); } -void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym3 : xm3, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } -void vpsrldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym3 : xm3, x, x, MM_0F | PP_66, 0x73, true); db(imm); } -void vpsllw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } -void vpsllw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x71, true); db(imm); } -void vpslld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } -void vpslld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x72, true); db(imm); } -void vpsllq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } -void vpsllq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x73, true); db(imm); } -void vpsraw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym4 : xm4, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } -void vpsraw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym4 : xm4, x, x, MM_0F | PP_66, 0x71, true); db(imm); } -void vpsrad(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym4 : xm4, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } -void vpsrad(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym4 : xm4, x, x, MM_0F | PP_66, 0x72, true); db(imm); } -void vpsrlw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } -void vpsrlw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x71, true); db(imm); } -void vpsrld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } -void vpsrld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x72, true); db(imm); } -void vpsrlq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } -void vpsrlq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x73, true); db(imm); } -void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); } -void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); } -void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); } -void vblendvps(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); } -void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4C, true); db(x4.getIdx() << 4); } -void vpblendvb(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4C, true); db(x4.getIdx() << 4); } -void vmovd(const Xmm& x, const Reg32& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 0); } -void vmovd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x6E, false, 0); } -void vmovd(const Reg32& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 0); } -void vmovd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x7E, false, 0); } -void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); } -void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); } -void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); } -void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, MM_0F, 0x12, false); } -void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, MM_0F, 0x16, false); } -void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F | PP_66, 0x50, true, 0); } -void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F, 0x50, true, 0); } -void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); } -void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); } -void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); } -void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); } -void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); } -void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); } -void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); } -void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x10, false); } -void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x10, false); } -void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x11, false); } -void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 0); } -void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 0); } -void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 0); } -void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 0); } -void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, op1, op2, op2.isREG(), Operand::XMM, MM_0F | PP_F3, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); } -void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, op1, op2, op2.isREG(), Operand::XMM, MM_0F | PP_F2, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); } -void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, MM_0F, 0x5A, true); } -void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, MM_0F | PP_F3, 0xE6, true); } -void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); } -void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); } -void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); } -void vcvtph2ps(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, NULL, &op, MM_0F38 | PP_66, 0x13, 0); } -void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, NULL, &op, MM_0F3A | PP_66, 0x1d, 0); db(imm); } -#ifdef XBYAK64 -void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); } -void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); } -void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x16, false, 1); db(imm); } -void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 1); db(imm); } -void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 1); db(imm); } -void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 1); } -void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 1); } -void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 1); } -void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 1); } -#endif -void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38, 0xf2, true); } -void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf6, true); } -void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf5, true); } -void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F3, 0xf5, true); } -void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38, 0xf7, false); } -void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38, 0xf5, false); } -void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_F3, 0xf7, false); } -void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_66, 0xf7, false); } -void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_F2, 0xf7, false); } -void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, MM_0F38, 0xf3, false); } -void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, MM_0F38, 0xf3, false); } -void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, MM_0F38, 0xf3, false); } -void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 1, 0); } -void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 1, 1); } -void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 0, 1); } -void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 0, 2); } -void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 0, 1); } -void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 0, 2); } -void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 1, 0); } -void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 1, 1); } diff --git a/plugins/GSdx_legacy/xbyak/xbyak_util.h b/plugins/GSdx_legacy/xbyak/xbyak_util.h deleted file mode 100644 index 22f0371f2b..0000000000 --- a/plugins/GSdx_legacy/xbyak/xbyak_util.h +++ /dev/null @@ -1,561 +0,0 @@ -/* Copyright (c) 2007 MITSUNARI Shigeo -* All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are met: -* -* Redistributions of source code must retain the above copyright notice, this -* list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, -* this list of conditions and the following disclaimer in the documentation -* and/or other materials provided with the distribution. -* Neither the name of the copyright owner nor the names of its contributors may -* be used to endorse or promote products derived from this software without -* specific prior written permission. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -* THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef XBYAK_XBYAK_UTIL_H_ -#define XBYAK_XBYAK_UTIL_H_ - -/** - utility class and functions for Xbyak - Xbyak::util::Clock ; rdtsc timer - Xbyak::util::Cpu ; detect CPU - @note this header is UNDER CONSTRUCTION! -*/ -#include "xbyak.h" - -#ifdef _MSC_VER - #if (_MSC_VER < 1400) && defined(XBYAK32) - static inline __declspec(naked) void __cpuid(int[4], int) - { - __asm { - push ebx - push esi - mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn - cpuid - mov esi, dword ptr [esp + 4 * 2 + 4] // data - mov dword ptr [esi], eax - mov dword ptr [esi + 4], ebx - mov dword ptr [esi + 8], ecx - mov dword ptr [esi + 12], edx - pop esi - pop ebx - ret - } - } - #else - #include // for __cpuid - #endif -#else - #ifndef __GNUC_PREREQ - #define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor))) - #endif - #if __GNUC_PREREQ(4, 3) && !defined(__APPLE__) - #include - #else - #if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm' - #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn)) - #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn)) - #else - #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn)) - #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn)) - #endif - #endif -#endif - -#ifdef _MSC_VER -extern "C" unsigned __int64 __xgetbv(int); -#endif - -namespace Xbyak { namespace util { - -/** - CPU detection class -*/ -class Cpu { - uint64 type_; - unsigned int get32bitAsBE(const char *x) const - { - return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24); - } - unsigned int mask(int n) const - { - return (1U << n) - 1; - } - void setFamily() - { - unsigned int data[4]; - getCpuid(1, data); - stepping = data[0] & mask(4); - model = (data[0] >> 4) & mask(4); - family = (data[0] >> 8) & mask(4); - // type = (data[0] >> 12) & mask(2); - extModel = (data[0] >> 16) & mask(4); - extFamily = (data[0] >> 20) & mask(8); - if (family == 0x0f) { - displayFamily = family + extFamily; - } else { - displayFamily = family; - } - if (family == 6 || family == 0x0f) { - displayModel = (extModel << 4) + model; - } else { - displayModel = model; - } - } -public: - int model; - int family; - int stepping; - int extModel; - int extFamily; - int displayFamily; // family + extFamily - int displayModel; // model + extModel - static inline void getCpuid(unsigned int eaxIn, unsigned int data[4]) - { -#ifdef _MSC_VER - __cpuid(reinterpret_cast(data), eaxIn); -#else - __cpuid(eaxIn, data[0], data[1], data[2], data[3]); -#endif - } - static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4]) - { -#ifdef _MSC_VER - __cpuidex(reinterpret_cast(data), eaxIn, ecxIn); -#else - __cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]); -#endif - } - static inline uint64 getXfeature() - { -#ifdef _MSC_VER - return __xgetbv(0); -#else - unsigned int eax, edx; - // xgetvb is not support on gcc 4.2 -// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); - __asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0)); - return ((uint64)edx << 32) | eax; -#endif - } - typedef uint64 Type; - static const Type NONE = 0; - static const Type tMMX = 1 << 0; - static const Type tMMX2 = 1 << 1; - static const Type tCMOV = 1 << 2; - static const Type tSSE = 1 << 3; - static const Type tSSE2 = 1 << 4; - static const Type tSSE3 = 1 << 5; - static const Type tSSSE3 = 1 << 6; - static const Type tSSE41 = 1 << 7; - static const Type tSSE42 = 1 << 8; - static const Type tPOPCNT = 1 << 9; - static const Type tAESNI = 1 << 10; - static const Type tSSE5 = 1 << 11; - static const Type tOSXSAVE = 1 << 12; - static const Type tPCLMULQDQ = 1 << 13; - static const Type tAVX = 1 << 14; - static const Type tFMA = 1 << 15; - - static const Type t3DN = 1 << 16; - static const Type tE3DN = 1 << 17; - static const Type tSSE4a = 1 << 18; - static const Type tRDTSCP = 1 << 19; - static const Type tAVX2 = 1 << 20; - static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt - static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx - static const Type tLZCNT = 1 << 23; - - static const Type tINTEL = 1 << 24; - static const Type tAMD = 1 << 25; - - static const Type tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb - static const Type tRDRAND = 1 << 27; - static const Type tADX = 1 << 28; // adcx, adox - static const Type tRDSEED = 1 << 29; // rdseed - static const Type tSMAP = 1 << 30; // stac - static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest - static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort - static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph - static const Type tMOVBE = uint64(1) << 34; // mobve - - Cpu() - : type_(NONE) - { - unsigned int data[4]; - getCpuid(0, data); - const unsigned int maxNum = data[0]; - static const char intel[] = "ntel"; - static const char amd[] = "cAMD"; - if (data[2] == get32bitAsBE(amd)) { - type_ |= tAMD; - getCpuid(0x80000001, data); - if (data[3] & (1U << 31)) type_ |= t3DN; - if (data[3] & (1U << 15)) type_ |= tCMOV; - if (data[3] & (1U << 30)) type_ |= tE3DN; - if (data[3] & (1U << 22)) type_ |= tMMX2; - if (data[3] & (1U << 27)) type_ |= tRDTSCP; - } - if (data[2] == get32bitAsBE(intel)) { - type_ |= tINTEL; - getCpuid(0x80000001, data); - if (data[3] & (1U << 27)) type_ |= tRDTSCP; - if (data[2] & (1U << 5)) type_ |= tLZCNT; - } - getCpuid(1, data); - if (data[2] & (1U << 0)) type_ |= tSSE3; - if (data[2] & (1U << 9)) type_ |= tSSSE3; - if (data[2] & (1U << 19)) type_ |= tSSE41; - if (data[2] & (1U << 20)) type_ |= tSSE42; - if (data[2] & (1U << 22)) type_ |= tMOVBE; - if (data[2] & (1U << 23)) type_ |= tPOPCNT; - if (data[2] & (1U << 25)) type_ |= tAESNI; - if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ; - if (data[2] & (1U << 27)) type_ |= tOSXSAVE; - if (data[2] & (1U << 30)) type_ |= tRDRAND; - if (data[2] & (1U << 29)) type_ |= tF16C; - - if (data[3] & (1U << 15)) type_ |= tCMOV; - if (data[3] & (1U << 23)) type_ |= tMMX; - if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE; - if (data[3] & (1U << 26)) type_ |= tSSE2; - - if (type_ & tOSXSAVE) { - // check XFEATURE_ENABLED_MASK[2:1] = '11b' - uint64 bv = getXfeature(); - if ((bv & 6) == 6) { - if (data[2] & (1U << 28)) type_ |= tAVX; - if (data[2] & (1U << 12)) type_ |= tFMA; - } - } - if (maxNum >= 7) { - getCpuidEx(7, 0, data); - if (type_ & tAVX && data[1] & 0x20) type_ |= tAVX2; - if (data[1] & (1U << 3)) type_ |= tBMI1; - if (data[1] & (1U << 8)) type_ |= tBMI2; - if (data[1] & (1U << 9)) type_ |= tENHANCED_REP; - if (data[1] & (1U << 18)) type_ |= tRDSEED; - if (data[1] & (1U << 19)) type_ |= tADX; - if (data[1] & (1U << 20)) type_ |= tSMAP; - if (data[1] & (1U << 4)) type_ |= tHLE; - if (data[1] & (1U << 11)) type_ |= tRTM; - } - setFamily(); - } - void putFamily() - { - printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n", - family, model, stepping, extFamily, extModel); - printf("display:family=%X, model=%X\n", displayFamily, displayModel); - } - bool has(Type type) const - { - return (type & type_) != 0; - } -}; - -class Clock { -public: - static inline uint64 getRdtsc() - { -#ifdef _MSC_VER - return __rdtsc(); -#else - unsigned int eax, edx; - __asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx)); - return ((uint64)edx << 32) | eax; -#endif - } - Clock() - : clock_(0) - , count_(0) - { - } - void begin() - { - clock_ -= getRdtsc(); - } - void end() - { - clock_ += getRdtsc(); - count_++; - } - int getCount() const { return count_; } - uint64 getClock() const { return clock_; } - void clear() { count_ = 0; clock_ = 0; } -private: - uint64 clock_; - int count_; -}; - -#ifdef XBYAK64 -const int UseRCX = 1 << 6; -const int UseRDX = 1 << 7; - -class Pack { - static const size_t maxTblNum = 10; - const Xbyak::Reg64 *tbl_[maxTblNum]; - size_t n_; -public: - Pack() : n_(0) {} - Pack(const Xbyak::Reg64 *tbl, size_t n) { init(tbl, n); } - Pack(const Pack& rhs) - : n_(rhs.n_) - { - if (n_ > maxTblNum) throw Error(ERR_INTERNAL); - for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i]; - } - Pack(const Xbyak::Reg64& t0) - { n_ = 1; tbl_[0] = &t0; } - Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; } - Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; } - Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; } - Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; } - Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; } - Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; } - Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; } - Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; } - Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) - { n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; } - Pack& append(const Xbyak::Reg64& t) - { - if (n_ == 10) { - fprintf(stderr, "ERR Pack::can't append\n"); - throw Error(ERR_BAD_PARAMETER); - } - tbl_[n_++] = &t; - return *this; - } - void init(const Xbyak::Reg64 *tbl, size_t n) - { - if (n > maxTblNum) { - fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n); - throw Error(ERR_BAD_PARAMETER); - } - n_ = n; - for (size_t i = 0; i < n; i++) { - tbl_[i] = &tbl[i]; - } - } - const Xbyak::Reg64& operator[](size_t n) const - { - if (n >= n_) { - fprintf(stderr, "ERR Pack bad n=%d\n", (int)n); - throw Error(ERR_BAD_PARAMETER); - } - return *tbl_[n]; - } - size_t size() const { return n_; } - /* - get tbl[pos, pos + num) - */ - Pack sub(size_t pos, size_t num = size_t(-1)) const - { - if (num == size_t(-1)) num = n_ - pos; - if (pos + num > n_) { - fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num); - throw Error(ERR_BAD_PARAMETER); - } - Pack pack; - pack.n_ = num; - for (size_t i = 0; i < num; i++) { - pack.tbl_[i] = tbl_[pos + i]; - } - return pack; - } - void put() const - { - for (size_t i = 0; i < n_; i++) { - printf("%s ", tbl_[i]->toString()); - } - printf("\n"); - } -}; - -class StackFrame { -#ifdef XBYAK64_WIN - static const int noSaveNum = 6; - static const int rcxPos = 0; - static const int rdxPos = 1; -#else - static const int noSaveNum = 8; - static const int rcxPos = 3; - static const int rdxPos = 2; -#endif - Xbyak::CodeGenerator *code_; - int pNum_; - int tNum_; - bool useRcx_; - bool useRdx_; - int saveNum_; - int P_; - bool makeEpilog_; - Xbyak::Reg64 pTbl_[4]; - Xbyak::Reg64 tTbl_[10]; - Pack p_; - Pack t_; - StackFrame(const StackFrame&); - void operator=(const StackFrame&); -public: - const Pack& p; - const Pack& t; - /* - make stack frame - @param sf [in] this - @param pNum [in] num of function parameter(0 <= pNum <= 4) - @param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX) - @param stackSizeByte [in] local stack size - @param makeEpilog [in] automatically call close() if true - - you can use - rax - gp0, ..., gp(pNum - 1) - gt0, ..., gt(tNum-1) - rcx if tNum & UseRCX - rdx if tNum & UseRDX - rsp[0..stackSizeByte - 1] - */ - StackFrame(Xbyak::CodeGenerator *code, int pNum, int tNum = 0, int stackSizeByte = 0, bool makeEpilog = true) - : code_(code) - , pNum_(pNum) - , tNum_(tNum & ~(UseRCX | UseRDX)) - , useRcx_((tNum & UseRCX) != 0) - , useRdx_((tNum & UseRDX) != 0) - , saveNum_(0) - , P_(0) - , makeEpilog_(makeEpilog) - , p(p_) - , t(t_) - { - using namespace Xbyak; - if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM); - const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0); - if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM); - const Reg64& _rsp = code->rsp; - const AddressFrame& _ptr = code->ptr; - saveNum_ = (std::max)(0, allRegNum - noSaveNum); - const int *tbl = getOrderTbl() + noSaveNum; - P_ = saveNum_ + (stackSizeByte + 7) / 8; - if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment - P_ *= 8; - if (P_ > 0) code->sub(_rsp, P_); -#ifdef XBYAK64_WIN - for (int i = 0; i < (std::min)(saveNum_, 4); i++) { - code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i])); - } - for (int i = 4; i < saveNum_; i++) { - code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); - } -#else - for (int i = 0; i < saveNum_; i++) { - code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); - } -#endif - int pos = 0; - for (int i = 0; i < pNum; i++) { - pTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); - } - for (int i = 0; i < tNum_; i++) { - tTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); - } - if (useRcx_ && rcxPos < pNum) code_->mov(code_->r10, code_->rcx); - if (useRdx_ && rdxPos < pNum) code_->mov(code_->r11, code_->rdx); - p_.init(pTbl_, pNum); - t_.init(tTbl_, tNum_); - } - /* - make epilog manually - @param callRet [in] call ret() if true - */ - void close(bool callRet = true) - { - using namespace Xbyak; - const Reg64& _rsp = code_->rsp; - const AddressFrame& _ptr = code_->ptr; - const int *tbl = getOrderTbl() + noSaveNum; -#ifdef XBYAK64_WIN - for (int i = 0; i < (std::min)(saveNum_, 4); i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]); - } - for (int i = 4; i < saveNum_; i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]); - } -#else - for (int i = 0; i < saveNum_; i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]); - } -#endif - if (P_ > 0) code_->add(_rsp, P_); - - if (callRet) code_->ret(); - } - ~StackFrame() - { - if (!makeEpilog_) return; - try { - close(); - } catch (std::exception& e) { - printf("ERR:StackFrame %s\n", e.what()); - exit(1); - } catch (...) { - printf("ERR:StackFrame otherwise\n"); - exit(1); - } - } -private: - const int *getOrderTbl() const - { - using namespace Xbyak; - static const int tbl[] = { -#ifdef XBYAK64_WIN - Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI, -#else - Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, -#endif - Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15 - }; - return &tbl[0]; - } - int getRegIdx(int& pos) const - { - assert(pos < 14); - using namespace Xbyak; - const int *tbl = getOrderTbl(); - int r = tbl[pos++]; - if (useRcx_) { - if (r == Operand::RCX) { return Operand::R10; } - if (r == Operand::R10) { r = tbl[pos++]; } - } - if (useRdx_) { - if (r == Operand::RDX) { return Operand::R11; } - if (r == Operand::R11) { return tbl[pos++]; } - } - return r; - } -}; -#endif - -} } // end of util -#endif